Why doesn't the producer-consumer stop? - python

I've found an example representing producer-consumer with two threads. But, when I send a signal to the process to stop, it doesn't. It expects second signal e.g. SIGKILL to completely stop. I thought the problem is with task_done() but it seems not.
import time
import queue
import threading
import random
class Producer(threading.Thread):
"""
Produces random integers to a list
"""
def __init__(self, queue):
"""
Constructor.
#param queue queue synchronization object
"""
threading.Thread.__init__(self)
self.queue = queue
def run(self):
"""
Thread run method. Append random integers to the integers
list at random time.
"""
while True:
integer = random.randint(0, 256)
self.queue.put(integer)
print('%d put to queue by %s' % (integer, self.name))
time.sleep(1)
class Consumer(threading.Thread):
"""
Consumes random integers from a list
"""
def __init__(self, queue):
"""
Constructor.
#param integers list of integers
#param queue queue synchronization object
"""
threading.Thread.__init__(self)
self.queue = queue
def run(self):
"""
Thread run method. Consumes integers from list
"""
while True:
integer = self.queue.get()
print('%d popped from list by %s' % (integer, self.name))
self.queue.task_done()
def main():
q = queue.Queue()
t1 = Producer(q)
t2 = Consumer(q)
t1.start()
t2.start()
t1.join()
t2.join()
if __name__ == '__main__':
main()
Output:
210 put to queue by Thread-1
210 popped from list by Thread-2
Traceback (most recent call last):
File "/Users/abc/PycharmProjects/untitled1/ssid.py", line 74, in <module>
main()
File "/Users/abc/PycharmProjects/untitled1/ssid.py", line 69, in main
t1.join()
File "/usr/local/Cellar/python3/3.6.3/Frameworks/Python.framework/Versions/3.6/lib/python3.6/threading.py", line 1056, in join
self._wait_for_tstate_lock()
File "/usr/local/Cellar/python3/3.6.3/Frameworks/Python.framework/Versions/3.6/lib/python3.6/threading.py", line 1072, in _wait_for_tstate_lock
elif lock.acquire(block, timeout):
KeyboardInterrupt
244 put to queue by Thread-1
244 popped from list by Thread-2
85 put to queue by Thread-1
85 popped from list by Thread-2
160 put to queue by Thread-1
160 popped from list by Thread-2

It's because only the main-thread get's stopped by the KeyboardInterrupt. You can watch this by letting your child threads print threading.enumerate() which returns all alive threads + the main thread.
import time
import queue
import threading
import random
class Producer(threading.Thread):
def __init__(self, queue):
super().__init__()
self.queue = queue
def run(self):
while True:
integer = random.randint(0, 256)
self.queue.put(integer)
print(f'{integer} put to queue by {self.name} '
f'threads: {threading.enumerate()}')
time.sleep(1)
class Consumer(threading.Thread):
def __init__(self, queue):
super().__init__()
self.queue = queue
def run(self):
while True:
integer = self.queue.get()
print(f'{integer} popped from list by {self.name} '
f'threads:{threading.enumerate()}')
self.queue.task_done()
def main():
q = queue.Queue()
t1 = Producer(q)
t2 = Consumer(q)
# t1.daemon = True
# t2.daemon = True
t1.start()
t2.start()
t1.join()
t2.join()
if __name__ == '__main__':
try:
main()
except KeyboardInterrupt:
print('got KeyboardInterrupt')
Example Output with KeyboardInterrupt. Note the MainThread listed as 'stopped' after the KeyboardInterrupt:
97 put to queue by Thread-1 threads: [<_MainThread(MainThread, started
139810293606208)>, <Producer(Thread-1, started 139810250913536)>,
<Consumer(Thread-2, started 139810242520832)>]
97 popped from list by Thread-2 threads:[<_MainThread(MainThread, started
139810293606208)>, <Producer(Thread-1, started 139810250913536)>,
<Consumer(Thread-2, started 139810242520832)>]
got KeyboardInterrupt
92 put to queue by Thread-1 threads: [<_MainThread(MainThread, stopped
139810293606208)>, <Producer(Thread-1, started 139810250913536)>,
<Consumer(Thread-2, started 139810242520832)>]
92 popped from list by Thread-2 threads:[<_MainThread(MainThread, stopped
139810293606208)>, <Producer(Thread-1, started 139810250913536)>,
<Consumer(Thread-2, started 139810242520832)>]
You could make the child-threads daemons to let them exit with the main-thread. But that should be only considered in case your threads don't hold any resources:
Note Daemon threads are abruptly stopped at shutdown. Their resources (such as open files, database transactions, etc.) may not be released properly. If you want your threads to stop gracefully, make them non-daemonic and use a suitable signalling mechanism such as an Event docs.
The better way would be to catch the KeyboardInterrupt like in the code above and send a sentinel value over the queue to the child-threads to let them know they should finish, allowing them to do clean-up before exit.

Related

Python queue stop when max reached

In the code below, I'm putting number on a queue from a thread and retrieving and printing them from the main thread. It's supposed to print number from 0 to 99 but it's stops at 9. The max size of the queue is 10.
def fetch(queue):
for i in range(100):
queue.put(i)
def main():
queue = Queue(maxsize=10)
Thread(target=fetch, args=(queue,)).start()
while not queue.empty():
item = queue.get()
print(item)
When I run this code I get:
0
1
2
3
4
5
6
7
8
9
The program doesn't stop, terminating it with ctl+c results:
^CException ignored in: <module 'threading' from '/usr/lib/python3.10/threading.py'>
Traceback (most recent call last):
File "/usr/lib/python3.10/threading.py", line 1560, in _shutdown
lock.acquire()
KeyboardInterrupt:
The queue.empty() method is notoriously unreliable due to the nature of threading. You should use a sentinal value to mark the end of the queue:
from threading import Thread
from queue import Queue
from time import sleep
def fetch(queue):
sleep(1)
for i in range(100):
queue.put(i)
queue.put(None) # None is a sentinal value
def sink1(queue):
while True:
item = queue.get()
if item == None:
break
print(item)
def main():
queue = Queue(maxsize=10)
t=Thread(target=fetch, args=(queue,))
t.start()
sink1(queue)
main()
print('Done')
I tried your code and it seemed to work for me. I then added sleep(1) to the fetch() function and then the program just quits immediately since the main thread immediately sees an empty queue.

Stopping a python thread while the queue is not empty

I have some code which I hopefully boiled down to a correct MWE.
My goal is to stop the (multiple) threads if a list within the thread has a specific length.
In contrast to the MWE it is not known how many iterations are needed:
from queue import Queue
from threading import Thread
def is_even(n):
return n % 2 == 0
class MT(Thread):
def __init__(self, queue):
super().__init__()
self.queue = queue
self.output = []
def run(self):
while len(self.output) < 4:
task = self.queue.get()
if is_even(task):
self.output.append(task)
self.queue.task_done()
else:
self.queue.task_done()
print(self.output)
print('done')
queue = Queue(10)
threads = 1
thr = []
for th in range(threads):
thr.append(MT(queue))
for th in thr:
th.start()
for i in range(100):
queue.put(i)
queue.join()
for th in thr:
th.join()
print('finished')
This code wil not hit finish...
To quote the documentation,
Queue.join()
Blocks until all items in the queue have been gotten and processed.
You have placed 100 items in the queue. The thread pulls 4 items, and completes. There are still 96 unprocessed items, and nobody is going to pull them. Therefore, queue.join() never returns.

Multithreading freezes when using `thread.join()`

I am trying to set up 3 thread and execute 5 tasks in a queue. The idea is that the threads will first run the first 3 tasks at the same time, then 2 threads finish the remaining 2. But the program seems freeze. I couldn't detect anything wrong with it.
from multiprocessing import Manager
import threading
import time
global exitFlag
exitFlag = 0
class myThread(threading.Thread):
def __init__(self, threadID, name, q):
threading.Thread.__init__(self)
self.threadID = threadID
self.name = name
self.q = q
def run(self):
print("Starting " + self.name)
process_data(self.name, self.q)
print("Exiting " + self.name)
def process_data(threadName, q):
global exitFlag
while not exitFlag:
if not workQueue.empty():
data = q.get()
print("%s processing %s" % (threadName, data))
else:
pass
time.sleep(1)
print('Nothing to Process')
threadList = ["Thread-1", "Thread-2", "Thread-3"]
nameList = ["One", "Two", "Three", "Four", "Five"]
queueLock = threading.Lock()
workQueue = Manager().Queue(10)
threads = []
threadID = 1
# create thread
for tName in threadList:
thread = myThread(threadID, tName, workQueue)
thread.start()
threads.append(thread)
threadID += 1
# fill up queue
queueLock.acquire()
for word in nameList:
workQueue.put(word)
queueLock.release()
# wait queue clear
while not workQueue.empty():
pass
# notify thread exit
exitFlag = 1
# wait for all threads to finish
for t in threads:
t.join()
print("Exiting Main Thread")
I don't know what happened exactly, but after I remove the join() part, the program is able to run just fun. What I don't understand is that exitFlag is supposed to have sent out the signal when the queue is emptied. So it seems somehow the signal was not detected by process_data()
There are multiple issues with your code. First of, threads in CPython don't run Python code "at the same time" because of the global interpreter lock (GIL). A thread must hold the GIL to execute Python bytecode. By default a thread holds the GIL for up to 5 ms (Python 3.2+), if it doesn't drop it earlier because it does blocking I/O. For parallel execution of Python code you would have to use multiprocessing.
You also needlessly use a Manager.Queue instead of a queue.Queue. A Manager.Queue is a queue.Queue on a separate manager-process. You introduced a detour with IPC and memory copying for no benefit here.
The cause of your deadlock is that you have a race condition here:
if not workQueue.empty():
data = q.get()
This is not an atomic operation. A thread can check workQueue.empty(), then drop the GIL, letting another thread drain the queue and then proceed with data = q.get(), which will block forever if you don't put something again on the queue. Queue.empty() checks are a general anti-pattern and there is no need to use it. Use poison pills (sentinel-values) to break a get-loop instead and to let the workers know they should exit. You need as many sentinel-values as you have workers. Find more about iter(callabel, sentinel) here.
import time
from queue import Queue
from datetime import datetime
from threading import Thread, current_thread
SENTINEL = 'SENTINEL'
class myThread(Thread):
def __init__(self, func, inqueue):
super().__init__()
self.func = func
self._inqueue = inqueue
def run(self):
print(f"{datetime.now()} {current_thread().name} starting")
self.func(self._inqueue)
print(f"{datetime.now()} {current_thread().name} exiting")
def process_data(_inqueue):
for data in iter(_inqueue.get, SENTINEL):
print(f"{datetime.now()} {current_thread().name} "
f"processing {data}")
time.sleep(1)
if __name__ == '__main__':
N_WORKERS = 3
inqueue = Queue()
input_data = ["One", "Two", "Three", "Four", "Five"]
sentinels = [SENTINEL] * N_WORKERS # one sentinel value per worker
# enqueue input and sentinels
for word in input_data + sentinels:
inqueue.put(word)
threads = [myThread(process_data, inqueue) for _ in range(N_WORKERS)]
for t in threads:
t.start()
for t in threads:
t.join()
print(f"{datetime.now()} {current_thread().name} exiting")
Example Output:
2019-02-14 17:58:18.265208 Thread-1 starting
2019-02-14 17:58:18.265277 Thread-1 processing One
2019-02-14 17:58:18.265472 Thread-2 starting
2019-02-14 17:58:18.265542 Thread-2 processing Two
2019-02-14 17:58:18.265691 Thread-3 starting
2019-02-14 17:58:18.265793 Thread-3 processing Three
2019-02-14 17:58:19.266417 Thread-1 processing Four
2019-02-14 17:58:19.266632 Thread-2 processing Five
2019-02-14 17:58:19.266767 Thread-3 exiting
2019-02-14 17:58:20.267588 Thread-1 exiting
2019-02-14 17:58:20.267861 Thread-2 exiting
2019-02-14 17:58:20.267994 MainThread exiting
Process finished with exit code 0
If you don't insist on subclassing Thread, you could also just use multiprocessing.pool.ThreadPool a.k.a. multiprocessing.dummy.Pool which does the plumbing for you in the background.

Multi-threading in Python

I am facing some issues while implementing multi-threading in python. The issue is very specific to my use case. Having gone through numerous posts on the same, I deployed the most widely suggested/used method for doing so.
I start by defining my thread class as follows.
class myThread(Thread):
def __init__(self, graphobj, q):
Thread.__init__(self)
self.graphobj = graphobj
self.q = q
def run(self):
improcess(self.graphobj, self.q)
Post which I define my function that does all the processing required.
def improcess(graphobj, q):
while not exitFlag:
queueLock.acquire()
if not q.empty():
photo_id = q.get()
queueLock.release()
# Complete processing
else:
queueLock.release()
Now comes the part where I am stuck. I am able to run the below mentioned code exactly as it is without any issues. However if I try and wrap the same in a function as such it breaks down.
def train_control(graphobj, photo_ids):
workQueue = Queue(len(photo_ids))
for i in range(1,5):
thread = myThread(graphobj=graphobj, q=workQueue)
thread.start()
threads.append(thread)
queueLock.acquire()
for photo_id in photo_ids:
workQueue.put(photo_id)
queueLock.release()
while not workQueue.empty():
pass
exitFlag = 1
for t in threads:
t.join()
By breaking down I mean that the threads complete their work but they don't stop waiting i.e. the exitFlag is never set to 1. I am unsure as to how to make this work.
Unfortunately the design of our systems is such that this piece of codes needs to be wrapped in a function which can be invoked by another module, so pulling it out is not really an option.
Looking forward to hearing from experts on this. Thanks in advance.
Edit : Forgot to mention this in the first draft. I globally initialize exitFlag and set its value to 0.
Below is the minimum, verifiable code snippet that I created to capture this problem:
import threading
import Queue
globvar01 = 5
globvar02 = 7
exitFlag = 0
globlist = []
threads = []
queueLock = threading.Lock()
workQueue = Queue.Queue(16)
class myThread(threading.Thread):
def __init__(self, threadID, q):
threading.Thread.__init__(self)
self.threadID = threadID
self.q = q
def run(self):
print "Starting thread " + str(self.threadID)
myfunc(self.threadID, self.q)
print "Exiting thread " + str(self.threadID)
def myfunc(threadID, q):
while not exitFlag:
queueLock.acquire()
if not workQueue.empty():
thoughtnum = q.get()
queueLock.release()
print "Processing thread " + str(threadID)
if (thoughtnum < globvar01):
globlist.append([1,2,3])
elif (thoughtnum < globvar02):
globlist.append([2,3,4])
else:
queueLock.release()
def controlfunc():
for i in range(1,5):
thread = myThread(i, workQueue)
thread.start()
threads.append(thread)
queueLock.acquire()
for i in range(1,11):
workQueue.put(i)
queueLock.release()
# Wait for queue to empty
while not workQueue.empty():
pass
exitFlag = 1
# Wait for all threads to complete
for t in threads:
t.join()
print "Starting main thread"
controlfunc()
print "Exiting Main Thread"
From your MCVE, the only thing missing is:
while not workQueue.empty():
pass
global exitFlag # Need this or `exitFlag` is a local variable only.
exitFlag = 1
You could eliminate the queueLock and the exitFlag, however, by using a sentinel value in the Queue to shut down the worker threads, and it eliminates the spin-waiting. Worker threads will sleep on a q.get() and the main thread won't have to spin-wait for an empty queue:
#!python2
from __future__ import print_function
import threading
import Queue
debug = 1
console = threading.Lock()
def tprint(*args,**kwargs):
if debug:
name = threading.current_thread().getName()
with console:
print('{}: '.format(name),end='')
print(*args,**kwargs)
globvar01 = 5
globvar02 = 7
globlist = []
threads = []
workQueue = Queue.Queue(16)
class myThread(threading.Thread):
def __init__(self, threadID, q):
threading.Thread.__init__(self)
self.threadID = threadID
self.q = q
def run(self):
tprint("Starting thread " + str(self.threadID))
myfunc(self.threadID, self.q)
tprint("Exiting thread " + str(self.threadID))
def myfunc(threadID, q):
while True:
thoughtnum = q.get()
tprint("Processing thread " + str(threadID))
if thoughtnum is None:
break
elif thoughtnum < globvar01:
globlist.append([1,2,3])
elif thoughtnum < globvar02:
globlist.append([2,3,4])
def controlfunc():
for i in range(1,5):
thread = myThread(i, workQueue)
thread.start()
threads.append(thread)
for i in range(1,11):
workQueue.put(i)
# Wait for all threads to complete
for t in threads:
workQueue.put(None)
for t in threads:
t.join()
tprint("Starting main thread")
controlfunc()
tprint("Exiting Main Thread")
Output:
MainThread: Starting main thread
Thread-1: Starting thread 1
Thread-2: Starting thread 2
Thread-3: Starting thread 3
Thread-4: Starting thread 4
Thread-1: Processing thread 1
Thread-2: Processing thread 2
Thread-3: Processing thread 3
Thread-4: Processing thread 4
Thread-1: Processing thread 1
Thread-2: Processing thread 2
Thread-3: Processing thread 3
Thread-4: Processing thread 4
Thread-1: Processing thread 1
Thread-2: Processing thread 2
Thread-3: Processing thread 3
Thread-4: Processing thread 4
Thread-1: Processing thread 1
Thread-2: Processing thread 2
Thread-3: Exiting thread 3
Thread-4: Exiting thread 4
Thread-1: Exiting thread 1
Thread-2: Exiting thread 2
MainThread: Exiting Main Thread
You need to make sure exitFlag is set to 0 (False) before spawning any threads otherwise in impprocess() they won't do anything and the queue will remain non-empty.
This problem could happen if you have exitFlag as a global and it's not cleared from a previous run.

Why the threads are not released after all work is consumed from python Queue

I use Queue to provide tasks that threads can work on. After all work is done from Queue, I see the threads are still alive while I expected them being released. Here is my code. You can see the active threads number is increasing after a batch of task(in the same queue) increases from the console. How could I release the threads after a batch of work get done?
import threading
import time
from Queue import Queue
class ThreadWorker(threading.Thread):
def __init__(self, task_queue):
threading.Thread.__init__(self)
self.task_queue = task_queue
def run(self):
while True:
work = self.task_queue.get()
#do some work
# do_work(work)
time.sleep(0.1)
self.task_queue.task_done()
def get_batch_work_done(works):
task_queue = Queue()
for _ in range(5):
t = ThreadWorker(task_queue)
t.setDaemon(True)
t.start()
for work in range(works):
task_queue.put(work)
task_queue.join()
print 'get batch work done'
print 'active threads count is {}'.format(threading.activeCount())
if __name__ == '__main__':
for work_number in range(3):
print 'start with {}'.format(work_number)
get_batch_work_done(work_number)
Do a non blocking read in a loop and use the exception handling to terminate
def run(self):
try:
while True:
work = self.task_queue.get(True, 0.1)
#do some work
# do_work(work)
except Queue.Empty:
print "goodbye"

Categories