Python parallel thread that consume Watchdog queue events

Python parallel thread that consume Watchdog queue events - python

I have this code that should put an event in a queue each time an external program (TCPdump) creates a *.pcap file in my directory.
My problem is that I always get an empty queue, although I got the print from process() function.
What am I doing wrong? Is the queue correctly defined and shared between the two classes?
EDIT-----------------
I maybe understood why I got an empty queue, I think it is because I'm printing the queue that I initialized before it gets filled by Handler class.
I modified my code and created two processes that should consume the same queue, but now the execution stuck on queue.put() and the thread ReadPcapFiles() stop running.
Here the updated code:
import time
import pyshark
import concurrent.futures
import threading
import logging
from queue import Queue
from multiprocessing import Process
from watchdog.observers import Observer, api
from watchdog.events import PatternMatchingEventHandler
class Handler(PatternMatchingEventHandler):
patterns = ["*.pcap", "*.pcapng"]
def __init__(self, queue):
PatternMatchingEventHandler.__init__(self)
self.queue = queue
def process(self, event):
#print(f'event type: {event.event_type} path : {event.src_path}')
self.queue.put(event.src_path)
logging.info(f"Storing message: {self.queue.qsize()}")
print("Producer queue: ", list(self.queue.queue))
#self.queue.get()
def on_created(self, event):
self.process(event)
def StartWatcher(watchdogq, event):
path = 'C:\\...'
handler = Handler(watchdogq)
observer = Observer()
while not event.is_set():
observer.schedule(handler, path, recursive=False)
print("About to start observer")
observer.start()
try:
while True:
time.sleep(1)
except Exception as error:
observer.stop()
print("Error: " + str(error))
observer.join()
def ReadPcapFiles(consumerq, event):
while not event.is_set() or not consumerq.empty():
print("Consumer queue: ", consumerq.get())
#print("Consumer queue: ", list(consumerq.queue))
# pcapfile = pyshark.FileCapture(self.queue.get())
# for packet in pcapfile:
# countPacket +=1
if __name__ == '__main__':
format = "%(asctime)s: %(message)s"
logging.basicConfig(format=format, level=logging.INFO,datefmt="%H:%M:%S")
logging.getLogger().setLevel(logging.DEBUG)
queue = Queue()
event = threading.Event()
with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor:
executor.submit(StartWatcher,queue, event)
executor.submit(ReadPcapFiles,queue, event)
time.sleep(0.1)
logging.info("Main: about to set event")
event.set()
OLD CODE:
import time
from queue import Queue
from watchdog.observers import Observer
from watchdog.events import PatternMatchingEventHandler
class Handler(PatternMatchingEventHandler):
patterns = ["*.pcap", "*.pcapng"]
def __init__(self, queue):
PatternMatchingEventHandler.__init__(self)
self.queue = queue
def process(self, event):
print(f'event type: {event.event_type} path : {event.src_path}')
self.queue.put(event.src_path)
def on_created(self, event):
self.process(event)
class Watcher():
def __init__(self, path):
self.queue = Queue()
self.observer = Observer()
self.handler = Handler(self.queue)
self.path = path
def start(self):
self.observer.schedule(self.handler, self.path, recursive=True)
self.observer.start()
try:
while True:
time.sleep(1)
self.queue.get()
print(list(self.queue.queue))
except Exception as error:
self.observer.stop()
print("Error: " + str(error))
self.observer.join()
if __name__ == '__main__':
watcher = Watcher('C:\\...')
watcher.start()

This is working for me (I got the main idea from this answer, thanks!) but notice that I consider this a workaround, so if someone has a better solution to this or can better explain the reason of such behavior in Python, please do not hesitate to answer!
My guess is that I had two main problems:
- I was starting Watchdog process inside another thread (and that was blocking somehow my queue consuming thread).
- Python threading does not work really in parallel and therefore starting an independent process was necessary.
Here my code:
import time
import pyshark
import threading
import logging
import os
from queue import Queue
from multiprocessing import Process, Pool
from watchdog.observers import Observer, api
from watchdog.events import PatternMatchingEventHandler
from concurrent.futures import ThreadPoolExecutor
class Handler(PatternMatchingEventHandler):
patterns = ["*.pcap", "*.pcapng"]
def __init__(self, queue):
PatternMatchingEventHandler.__init__(self)
self.queue = queue
def process(self, event):
self.queue.put(event.src_path)
logging.info(f"Storing message: {self.queue.qsize()}")
print("Producer queue: ", list(self.queue.queue))
def on_created(self, event):
#wait that the transfer of the file is finished before processing it
file_size = -1
while file_size != os.path.getsize(event.src_path):
file_size = os.path.getsize(event.src_path)
time.sleep(1)
self.process(event)
def ConsumeQueue(consumerq):
while True:
if not consumerq.empty():
pool = Pool()
pool.apply_async(ReadPcapFiles, (consumerq.get(), ))
else:
time.sleep(1)
def ReadPcapFiles(get_event):
createdFile = get_event
print(f"This is my event in ReadPacapFile {createdFile}")
countPacket = 0
bandwidth = 0
pcapfile = pyshark.FileCapture(createdFile)
for packet in pcapfile:
countPacket +=1
bandwidth = bandwidth + int(packet.length)
print(f"Packet nr {countPacket}")
print(f"Byte per second {bandwidth}")
if __name__ == '__main__':
format = "%(asctime)s: %(message)s"
logging.basicConfig(format=format, level=logging.INFO,datefmt="%H:%M:%S")
logging.getLogger().setLevel(logging.DEBUG)
queue = Queue()
path = 'C:\\...'
worker = threading.Thread(target=ConsumeQueue, args=(queue, ), daemon=True)
print("About to start worker")
worker.start()
event_handler = Handler(queue)
observer = Observer()
observer.schedule(event_handler, path, recursive=False)
print("About to start observer")
observer.start()
try:
while True:
time.sleep(1)
except Exception as error:
observer.stop()
print("Error: " + str(error))
observer.join()

There is an excellent library which provides concurrent access to the items within that queue. The queue is also persistent[file based as well as database based], so if the program crashes, you can still consume events from the point where the program crashed.
persist-queue

Related

python thread not exiting with atexit

Here is my script. When I run it in a shell it just hangs indefinitely whereas I would expect it to terminate cleanly.
import logging
from logging import StreamHandler
import pymsteams
import queue
import threading
import atexit
class TeamsHandler(StreamHandler):
def __init__(self, channel_url):
super().__init__()
self.channel_url = channel_url
self.queue = queue.Queue()
self.thread = threading.Thread(target=self._worker)
self.thread.start()
atexit.register(self.queue.put, None)
def _worker(self):
while True:
record = self.queue.get()
if record is None:
break
msg = self.format(record)
print(msg)
def emit(self, record):
# enqueue the record to log and return control to the caller
self.queue.put(record)
if __name__ == "__main__":
my_logger = logging.getLogger('TestLogging')
my_logger.setLevel(logging.DEBUG)
console_handler = logging.StreamHandler()
console_handler.setLevel(logging.DEBUG)
my_logger.addHandler(console_handler)
CHANNEL_ID = "not_used_anyway"
teamshandler = TeamsHandler(CHANNEL_ID)
teamshandler.setFormatter(logging.Formatter('%(levelname)s %(message)s'))
teamshandler.setLevel(logging.DEBUG)
my_logger.addHandler(teamshandler)
for i in range(1, 2):
my_logger.error(f"this is an error [{i}]")
my_logger.info(f"this is an info [{i}]")
The None record that should be sent by atexit (line 28) never arrives so the thread stays open forever.
How to make sure that the program exits cleanly by modifying the TeamsHandler only ?

I got something working, have a look:
import queue
import threading
class Worker:
def __init__(self):
self.queue = queue.Queue()
threading.Thread(target=self._worker).start()
def _worker(self):
print("starting thread")
while True:
record = self.queue.get()
if record is None:
print("exiting")
break
print(f"Got message: {record}")
def emit(self, record):
self.queue.put(record)
class Wrapper:
def __init__(self):
self._worker = Worker()
def __del__(self):
print("Wrapper is being deleted")
self._worker.emit(None)
def emit(self, record):
self._worker.emit(record)
def main():
worker = Wrapper()
worker.emit("foo")
worker.emit("bar")
print("main exits")
if __name__ == "__main__":
main()
The point here is that when main exits, worker (which is an instance of Wrapper) goes out of scope, and its __del__ method is called, and it sends stop message to a real worker object.
The results of running this code ("Got message" lines can be in different places, of course):
starting thread
main exits
Wrapper is being deleted
Got message: foo
Got message: bar
exiting

As pointed out by avysk, the problem is likely that atexit handlers fire too late, after the waiting for the non-daemon threads is already (supposed to be) done, which leads to deadlock.
If I were you, I'd just add a call like TeamsHandler.finish() at the end of if __name__ == '__main__' block, and modify TeamsHandler along these lines (untested):
_queues = []
class TeamsHandler(StreamHandler):
def __init__(self, channel_url):
super().__init__()
self.channel_url = channel_url
self.queue = queue.Queue()
self.thread = threading.Thread(target=self._worker)
self.thread.start()
_queues.append(self.queue)
def _worker(self):
while True:
record = self.queue.get()
if record is None:
break
msg = self.format(record)
print(msg)
def emit(self, record):
# enqueue the record to log and return control to the caller
self.queue.put(record)
#staticmethod
def finish(self):
for q in _queues:
q.put(None)
del _queues[:]

Integrating multiprocessing.Process with concurrent.future._base.Future

I have a requirement of creating child processes, receive results using Future and then kill some of them when required.
For this I have subclassed multiprocessing.Process class and return a Future object from the start() method.
The problem is that I am not able to receive the result in the cb() function as it never gets called.
Please help/suggest if this can be done in some other way or something I am missing in my current implementation?
Following is my current approach
from multiprocessing import Process, Queue
from concurrent.futures import _base
import threading
from time import sleep
def foo(x,q):
print('result {}'.format(x*x))
result = x*x
sleep(5)
q.put(result)
class MyProcess(Process):
def __init__(self, target, args):
super().__init__()
self.target = target
self.args = args
self.f = _base.Future()
def run(self):
q = Queue()
worker_thread = threading.Thread(target=self.target, args=(self.args+ (q,)))
worker_thread.start()
r = q.get(block=True)
print('setting result {}'.format(r))
self.f.set_result(result=r)
print('done setting result')
def start(self):
f = _base.Future()
run_thread = threading.Thread(target=self.run)
run_thread.start()
return f
def cb(future):
print('received result in callback {}'.format(future))
def main():
p1 = MyProcess(target=foo, args=(2,))
f = p1.start()
f.add_done_callback(fn=cb)
sleep(10)
if __name__ == '__main__':
main()
print('Main thread dying')

In your start method you create a new Future which you then return. This is a different future then the one you set the result on, this future is just not used at all. Try:
def start(self):
run_thread = threading.Thread(target=self.run)
run_thread.start()
return self.f
However there are more problems with your code. You override the start method of the process, replacing it with execution on a worker thread, therefore actually bypassing multiprocessing. Also you shouldn't import the _base module, that is an implementation detail as seen from the leading underscore. You should import concurrent.futures.Future (it's the same class, but through public API).
This really uses multiprocessing:
from multiprocessing import Process, Queue
from concurrent.futures import Future
import threading
from time import sleep
def foo(x,q):
print('result {}'.format(x*x))
result = x*x
sleep(5)
q.put(result)
class MyProcess(Process):
def __init__(self, target, args):
super().__init__()
self.target = target
self.args = args
self.f = Future()
def run(self):
q = Queue()
worker_thread = threading.Thread(target=self.target, args=(self.args+ (q,)))
worker_thread.start()
r = q.get(block=True)
print('setting result {}'.format(r))
self.f.set_result(result=r)
print('done setting result')
def cb(future):
print('received result in callback {}: {}'.format(future, future.result()))
def main():
p1 = MyProcess(target=foo, args=(2,))
p1.f.add_done_callback(fn=cb)
p1.start()
p1.join()
sleep(10)
if __name__ == '__main__':
main()
print('Main thread dying')
And you're already in a new process now, spawning a worker thread to execute your target function shouldn't really be necessary, you could just execute your target function directly instead. Should the target function raise an Exception you wouldn't know about it, your callback will only be called on success. So if you fix that, then you're left with:
from multiprocessing import Process
from concurrent.futures import Future
import threading
from time import sleep
def foo(x):
print('result {}'.format(x*x))
result = x*x
sleep(5)
return result
class MyProcess(Process):
def __init__(self, target, args):
super().__init__()
self.target = target
self.args = args
self.f = Future()
def run(self):
try:
r = self.target(*self.args)
print('setting result {}'.format(r))
self.f.set_result(result=r)
print('done setting result')
except Exception as ex:
self.f.set_exception(ex)
def cb(future):
print('received result in callback {}: {}'.format(future, future.result()))
def main():
p1 = MyProcess(target=foo, args=(2,))
p1.f.add_done_callback(fn=cb)
p1.start()
p1.join()
sleep(10)
if __name__ == '__main__':
main()
print('Main thread dying')
This is basically what a ProcessPoolExecutor does.

How To Abort Threads that Pull Items from a Queue Using Ctrl+C In Python

I've implemented some threaded application using python. During runtime i want to catch the CTRL+C sigcall and exit the program. To do that I've registered a function called exit_gracefully which also takes care of stopping the threads in a more controlled way. However, it does not seem to work. It seems the handler is never called
Here's the example I'm working with:
import Queue
import threading
import signal
import sys
import time
queue = Queue.Queue()
workers = list()
def callback(id, item):
print("{}: {}".format(id, item))
time.sleep(1)
def exit_gracefully(signum, frame):
print("Ctrl+C was pressed. Shutting threads down ...")
print("Stopping workers ...")
for worker in workers:
worker.stop()
sys.exit(1)
class ThreadedTask(threading.Thread):
def __init__(self, id, queue, callbacks):
threading.Thread.__init__(self)
self._stop_event = threading.Event()
self.id = str(id)
self.queue = queue
self.callbacks = callbacks
self._stopped = False
def run(self):
while not self.stopped():
item = self.queue.get()
for callback in self.callbacks:
callback(self.id, item)
self.queue.task_done()
def stop(self):
self._stop_event.set()
self._stopped = True
def stopped(self):
return self._stop_event.is_set() or self._stopped
def main(input_file, thread_count, callbacks):
print("Initializing queue ...")
queue = Queue.Queue()
print("Parsing '{}' ...".format(input_file))
with open(input_file) as f:
for line in f:
queue.put(line.replace("\n", ""))
print("Initializing {} threads ...".format(thread_count))
for id in range(thread_count):
worker = ThreadedTask(id, queue, callbacks)
worker.setDaemon(True)
workers.append(worker)
print("Starting {} threads ...".format(thread_count))
for worker in workers:
worker.start()
queue.join()
if __name__ == '__main__':
signal.signal(signal.SIGINT, exit_gracefully)
print("Starting main ...")
input_file = "list.txt"
thread_count = 10
callbacks = [
callback
]
main(input_file, thread_count, callbacks)
If you want to try the example above you may generate some test-data first:
seq 1 10000 > list.txt
Any help is appreciated!

Here's a solution that seems to work.
One issue is that Queue.get() will ignore SIGINT unless a timeout is set. That's documented here: https://bugs.python.org/issue1360.
Another issue is that Queue.join() also seems to ignore SIGINT. I worked around that by polling the queue in a loop to see if it's empty.
These issues appear to have been fixed in Python 3.
I also added a shared event that's used in the SIGINT handler to tell all the threads to shut down.
import Queue
import signal
import sys
import threading
import time
def callback(id, item):
print '{}: {}'.format(id, item)
time.sleep(1)
class ThreadedTask(threading.Thread):
def __init__(self, id, queue, run_event, callbacks):
super(ThreadedTask, self).__init__()
self.id = id
self.queue = queue
self.run_event = run_event
self.callbacks = callbacks
def run(self):
queue = self.queue
while not self.run_event.is_set():
try:
item = queue.get(timeout=0.1)
except Queue.Empty:
pass
else:
for callback in self.callbacks:
callback(self.id, item)
queue.task_done()
def main():
queue = Queue.Queue()
run_event = threading.Event()
workers = []
def stop():
run_event.set()
for worker in workers:
# Allow worker threads to shut down completely
worker.join()
def sigint_handler(signum, frame):
print '\nShutting down...'
stop()
sys.exit(0)
signal.signal(signal.SIGINT, sigint_handler)
callbacks = [callback]
for id in range(1, 11):
worker = ThreadedTask(id, queue, run_event, callbacks)
workers.append(worker)
for worker in workers:
worker.start()
with open('list.txt') as fp:
for line in fp:
line = line.strip()
queue.put(line)
while not queue.empty():
time.sleep(0.1)
# Update: Added this to gracefully shut down threads after all
# items are consumed from the queue.
stop()
if __name__ == '__main__':
main()

python multiprocessing/threading cleanup

I have a python tool, that has basically this kind of setup:
main process (P1) -> spawns a process (P2) that starts a tcp connection
-> spawns a thread (T1) that starts a loop to receive
messages that are sent from P2 to P1 via a Queue (Q1)
server process (P2) -> spawns two threads (T2 and T3) that start loops to
receive messages that are sent from P1 to P2 via Queues (Q2 and Q3)
The problem I'm having is that when I stop my program (with Ctrl+C), it doesn't quit. The server process is ended, but the main process just hangs there and I have to kill it.
The thread loop functions all look the same:
def _loop(self):
while self.running:
res = self.Q1.get()
if res is None:
break
self._handle_msg(res)
All threads are started as daemon:
t = Thread(target=self._loop)
t.setDaemon(True)
t.start()
In my main process, I use atexit, to perform clean-up tasks:
atexit.register(self.on_exit)
Those clean-up tasks are essentially the following:
1) set self.running in P1 to False and sent None to Q1, so that the Thread T1 should finish
self.running = False
self.Q1.put(None)
2) send a message to P2 via Q2 to inform this process that it is ending
self.Q2.put("stop")
3) In P2, react to the "stop" message and do what we did in P1
self.running = False
self.Q2.put(None)
self.Q3.put(None)
That is it and in my understanding, that should make everything shut down nicely, but it doesn't.
The main code of P1 also contains the following endless loop, because otherwise the program would end prematurely:
while running:
sleep(1)
Maybe that has something to do with the problem, but I cannot see why it should.
So what did I do wrong? Does my setup have major design flaws? Did I forget to shut down something?
EDIT
Ok, I modified my code and managed to make it shut down correctly most of the time. Unfortunately, from now and then, it still got stuck.
I managed to write a small working example of my code. To demonstrate what happens, you need to simple start the script and then use Ctrl + C to stop it. It looks like the issue appears now usually if you press Ctrl + C as soon as possible after starting the tool.
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import signal
import sys
import logging
from multiprocessing import Process, Queue
from threading import Thread
from time import sleep
logger = logging.getLogger("mepy-client")
class SocketClientProtocol(object):
def __init__(self, q_in, q_out, q_binary):
self.q_in = q_in
self.q_out = q_out
self.q_binary = q_binary
self.running = True
t = Thread(target=self._loop)
#t.setDaemon(True)
t.start()
t = Thread(target=self._loop_binary)
#t.setDaemon(True)
t.start()
def _loop(self):
print "start of loop 2"
while self.running:
res = self.q_in.get()
if res is None:
break
self._handle_msg(res)
print "end of loop 2"
def _loop_binary(self):
print "start of loop 3"
while self.running:
res = self.q_binary.get()
if res is None:
break
self._handle_binary(res)
print "end of loop 3"
def _handle_msg(self, msg):
msg_type = msg[0]
if msg_type == "stop2":
print "STOP RECEIVED"
self.running = False
self.q_in.put(None)
self.q_binary.put(None)
def _put_msg(self, msg):
self.q_out.put(msg)
def _handle_binary(self, data):
pass
def handle_element(self):
self._put_msg(["something"])
def run_twisted(q_in, q_out, q_binary):
s = SocketClientProtocol(q_in, q_out, q_binary)
while s.running:
sleep(2)
s.handle_element()
class MediatorSender(object):
def __init__(self):
self.q_in = None
self.q_out = None
self.q_binary = None
self.p = None
self.running = False
def start(self):
if self.running:
return
self.running = True
self.q_in = Queue()
self.q_out = Queue()
self.q_binary = Queue()
print "!!!!START"
self.p = Process(target=run_twisted, args=(self.q_in, self.q_out, self.q_binary))
self.p.start()
t = Thread(target=self._loop)
#t.setDaemon(True)
t.start()
def stop(self):
print "!!!!STOP"
if not self.running:
return
print "STOP2"
self.running = False
self.q_out.put(None)
self.q_in.put(["stop2"])
#self.q_in.put(None)
#self.q_binary.put(None)
try:
if self.p and self.p.is_alive():
self.p.terminate()
except:
pass
def _loop(self):
print "start of loop 1"
while self.running:
res = self.q_out.get()
if res is None:
break
self._handle_msg(res)
print "end of loop 1"
def _handle_msg(self, msg):
self._put_msg(msg)
def _put_msg(self, msg):
self.q_in.put(msg)
def _put_binary(self, msg):
self.q_binary.put(msg)
def send_chunk(self, chunk):
self._put_binary(chunk)
running = True
def signal_handler(signal, frame):
global running
if running:
running = False
ms.stop()
else:
sys.exit(0)
if __name__ == "__main__":
signal.signal(signal.SIGINT, signal_handler)
ms = MediatorSender()
ms.start()
for i in range(100):
ms.send_chunk("some chunk of data")
while running:
sleep(1)

I think you're corrupting your multiprocessing.Queue by calling p.terminate() on on the child process. The docs have a warning about this:
Warning: If this method is used when the associated process is using a
pipe or queue then the pipe or queue is liable to become corrupted and
may become unusable by other process. Similarly, if the process has
acquired a lock or semaphore etc. then terminating it is liable to
cause other processes to deadlock.
In some cases, it looks like p is terminating before your MediatorSender._loop method can consume the sentinel you loaded into it to let it know that it should exit.
Also, you're installing a signal handler that expects to work in the main process only, but the SIGINT is actually received by both the parent and the child processes, which means signal_handler gets called in both processes, could result in ms.stop getting called twice, due to a race condition in the way you handle setting ms.running to False
I would recommend just exploiting that both processes receive the SIGINT, and have both the parent and child handle KeyboardInterrupt directly. That way, each then have each shut themselves down cleanly, rather than have the parent terminate the child. The following code demonstrates that, and in my testing never hung. I've simplified your code in a few places, but functionally it's exactly the same:
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import logging
from multiprocessing import Process, Queue
from threading import Thread
from time import sleep
logger = logging.getLogger("mepy-client")
class SocketClientProtocol(object):
def __init__(self, q_in, q_out, q_binary):
self.q_in = q_in
self.q_out = q_out
self.q_binary = q_binary
t = Thread(target=self._loop)
t.start()
t = Thread(target=self._loop_binary)
t.start()
def _loop(self):
print("start of loop 2")
for res in iter(self.q_in.get, None):
self._handle_msg(res)
print("end of loop 2")
def _loop_binary(self):
print("start of loop 3")
for res in iter(self.q_binary.get, None):
self._handle_binary(res)
print("end of loop 3")
def _handle_msg(self, msg):
msg_type = msg[0]
if msg_type == "stop2":
self.q_in.put(None)
self.q_binary.put(None)
def _put_msg(self, msg):
self.q_out.put(msg)
def stop(self):
print("STOP RECEIVED")
self.q_in.put(None)
self.q_binary.put(None)
def _handle_binary(self, data):
pass
def handle_element(self):
self._put_msg(["something"])
def run_twisted(q_in, q_out, q_binary):
s = SocketClientProtocol(q_in, q_out, q_binary)
try:
while True:
sleep(2)
s.handle_element()
except KeyboardInterrupt:
s.stop()
class MediatorSender(object):
def __init__(self):
self.q_in = None
self.q_out = None
self.q_binary = None
self.p = None
self.running = False
def start(self):
if self.running:
return
self.running = True
self.q_in = Queue()
self.q_out = Queue()
self.q_binary = Queue()
print("!!!!START")
self.p = Process(target=run_twisted,
args=(self.q_in, self.q_out, self.q_binary))
self.p.start()
self.loop = Thread(target=self._loop)
self.loop.start()
def stop(self):
print("!!!!STOP")
if not self.running:
return
print("STOP2")
self.running = False
self.q_out.put(None)
def _loop(self):
print("start of loop 1")
for res in iter(self.q_out.get, None):
self._handle_msg(res)
print("end of loop 1")
def _handle_msg(self, msg):
self._put_msg(msg)
def _put_msg(self, msg):
self.q_in.put(msg)
def _put_binary(self, msg):
self.q_binary.put(msg)
def send_chunk(self, chunk):
self._put_binary(chunk)
if __name__ == "__main__":
ms = MediatorSender()
try:
ms.start()
for i in range(100):
ms.send_chunk("some chunk of data")
# You actually have to join w/ a timeout in a loop on
# Python 2.7. If you just call join(), SIGINT won't be
# received by the main process, and the program will
# hang. This is a bug, and is fixed in Python 3.x.
while True:
ms.loop.join()
except KeyboardInterrupt:
ms.stop()
Edit:
If you prefer to use a signal handler rather than catching KeyboardInterrupt, you just need to make sure the child process uses its own signal handler, rather than inheriting the parent's:
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import signal
import logging
from functools import partial
from multiprocessing import Process, Queue
from threading import Thread
from time import sleep
logger = logging.getLogger("mepy-client")
class SocketClientProtocol(object):
def __init__(self, q_in, q_out, q_binary):
self.q_in = q_in
self.q_out = q_out
self.q_binary = q_binary
self.running = True
t = Thread(target=self._loop)
t.start()
t = Thread(target=self._loop_binary)
t.start()
def _loop(self):
print("start of loop 2")
for res in iter(self.q_in.get, None):
self._handle_msg(res)
print("end of loop 2")
def _loop_binary(self):
print("start of loop 3")
for res in iter(self.q_binary.get, None):
self._handle_binary(res)
print("end of loop 3")
def _handle_msg(self, msg):
msg_type = msg[0]
if msg_type == "stop2":
self.q_in.put(None)
self.q_binary.put(None)
def _put_msg(self, msg):
self.q_out.put(msg)
def stop(self):
print("STOP RECEIVED")
self.running = False
self.q_in.put(None)
self.q_binary.put(None)
def _handle_binary(self, data):
pass
def handle_element(self):
self._put_msg(["something"])
def run_twisted(q_in, q_out, q_binary):
s = SocketClientProtocol(q_in, q_out, q_binary)
signal.signal(signal.SIGINT, partial(signal_handler_child, s))
while s.running:
sleep(2)
s.handle_element()
class MediatorSender(object):
def __init__(self):
self.q_in = None
self.q_out = None
self.q_binary = None
self.p = None
self.running = False
def start(self):
if self.running:
return
self.running = True
self.q_in = Queue()
self.q_out = Queue()
self.q_binary = Queue()
print("!!!!START")
self.p = Process(target=run_twisted,
args=(self.q_in, self.q_out, self.q_binary))
self.p.start()
self.loop = Thread(target=self._loop)
self.loop.start()
def stop(self):
print("!!!!STOP")
if not self.running:
return
print("STOP2")
self.running = False
self.q_out.put(None)
def _loop(self):
print("start of loop 1")
for res in iter(self.q_out.get, None):
self._handle_msg(res)
print("end of loop 1")
def _handle_msg(self, msg):
self._put_msg(msg)
def _put_msg(self, msg):
self.q_in.put(msg)
def _put_binary(self, msg):
self.q_binary.put(msg)
def send_chunk(self, chunk):
self._put_binary(chunk)
def signal_handler_main(ms, *args):
ms.stop()
def signal_handler_child(s, *args):
s.stop()
if __name__ == "__main__":
ms = MediatorSender()
signal.signal(signal.SIGINT, partial(signal_handler_main, ms))
ms.start()
for i in range(100):
ms.send_chunk("some chunk of data")
while ms.loop.is_alive():
ms.loop.join(9999999)
print('done main')

Maybe you should try to capture SIGINT signal, which is generated by Ctrl + C using signal.signal like this:
#!/usr/bin/env python
import signal
import sys
def signal_handler(signal, frame):
print('You pressed Ctrl+C!')
sys.exit(0)
signal.signal(signal.SIGINT, signal_handler)
print('Press Ctrl+C')
signal.pause()
Code stolen from here

This usually works for me if I am using the threading module. It will not work if you use the multiprocessing one though. If you are running the script from the terminal try running it in the background, like this.
python scriptFoo.py &
After you run the process it will output the PID like this
[1] 23107
Whenever you need to quit the script you just type kill and the script PID like this.
kill 23107
Hit enter again and it should kill all the subprocesses and output this.
[1]+ Terminated python scriptFoo.py
As far as I know you cannot kill all the subprocesses with 'Ctrl+C'

why is queue empty for the file to be downloaded

Below is the code that I have that downloads various URLS into each separate thread, I was in attempt to make some changes before I implement the thread pool but with this change the queue is coming to be empty and download is not beginning.
import Queue
import urllib2
import os
import utils as _fdUtils
import signal
import sys
import time
import threading
class ThreadedFetch(threading.Thread):
""" docstring for ThreadedFetch
"""
def __init__(self, queue, out_queue):
super(ThreadedFetch, self).__init__()
self.queueItems = queue.get()
self.__url = self.queueItems[0]
self.__saveTo = self.queueItems[1]
self.outQueue = out_queue
def run(self):
fileName = self.__url.split('/')[-1]
path = os.path.join(DESKTOP_PATH, fileName)
file_size = int(_fdUtils.getUrlSizeInBytes(self.__url))
while not STOP_REQUEST.isSet():
urlFh = urllib2.urlopen(self.__url)
_log.info("Download: %s" , fileName)
with open(path, 'wb') as fh:
file_size_dl = 0
block_sz = 8192
while True:
buffer = urlFh.read(block_sz)
if not buffer:
break
file_size_dl += len(buffer)
fh.write(buffer)
status = r"%10d [%3.2f%%]" % (file_size_dl, file_size_dl * 100. / file_size)
status = status + chr(8)*(len(status)+1)
sys.stdout.write('%s\r' % status)
time.sleep(.05)
sys.stdout.flush()
if file_size_dl == file_size:
_log.info("Download Completed %s%% for file %s, saved to %s",
file_size_dl * 100. / file_size, fileName, DESKTOP_PATH)
below is the main function that does the call and initiation.
def main(appName):
args = _fdUtils.getParser()
urls_saveTo = {}
# spawn a pool of threads, and pass them queue instance
# each url will be downloaded concurrently
for i in range(len(args.urls)):
t = ThreadedFetch(queue, out_queue)
t.daemon = True
t.start()
try:
for url in args.urls:
urls_saveTo[url] = args.saveTo
# urls_saveTo = {urls[0]: args.saveTo, urls[1]: args.saveTo, urls[2]: args.saveTo}
# populate queue with data
for item, value in urls_saveTo.iteritems():
queue.put([item, value])
# wait on the queue until everything has been processed
queue.join()
print '*** Done'
except (KeyboardInterrupt, SystemExit):
lgr.critical('! Received keyboard interrupt, quitting threads.')

You create the queue and then the first thread which immediately tries to fetch an item from the still empty queue. The ThreadedFetch.__init__() method isn't run asynchronously, just the run() method when you call start() on a thread object.
Store the queue in the __init__() and move the get() into the run() method. That way you can create all the threads and they are blocking in their own thread, giving you the chance to put items into the queue in the main thread.
class ThreadedFetch(threading.Thread):
def __init__(self, queue, out_queue):
super(ThreadedFetch, self).__init__()
self.queue = queue
self.outQueue = out_queue
def run(self):
url, save_to = self.queue.get()
# ...
For this example the queue is unnecessary by the way as every thread gets exactly one item from the queue. You could pass that item directly to the thread when creating the thread object:
class ThreadedFetch(threading.Thread):
def __init__(self, url, save_to, out_queue):
super(ThreadedFetch, self).__init__()
self.url = url
self.save_to = save_to
self.outQueue = out_queue
def run(self):
# ...
And when the ThreadedFetch class really just consists of the __init__() and run() method you may consider moving the run() method into a function and start that asynchronously.
def fetch(url, save_to, out_queue):
# ...
# ...
def main():
# ...
thread = Thread(target=fetch, args=(url, save_to, out_queue))
thread.daemon = True
thread.start()

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

Python parallel thread that consume Watchdog queue events - python

There is an excellent library which provides concurrent access to the items within that queue. The queue is also persistent[file based as well as database based], so if the program crashes, you can still consume events from the point where the program crashed. persist-queue

Related

python thread not exiting with atexit

Integrating multiprocessing.Process with concurrent.future._base.Future

How To Abort Threads that Pull Items from a Queue Using Ctrl+C In Python

python multiprocessing/threading cleanup

why is queue empty for the file to be downloaded

Categories

Resources