I have noticed that when I have many threads pulling elements from a queue, there are less elements processed than the number that I put into the queue. This is sporadic but seems to happen somewhere around half the time when I run the following code.
#!/bin/env python
from threading import Thread
import httplib, sys
from Queue import Queue
import time
import random
concurrent = 500
num_jobs = 500
results = {}
def doWork():
while True:
result = None
try:
result = curl(q.get())
except Exception as e:
print "Error when trying to get from queue: {0}".format(str(e))
if results.has_key(result):
results[result] += 1
else:
results[result] = 1
try:
q.task_done()
except:
print "Called task_done when all tasks were done"
def curl(ourl):
result = 'all good'
try:
time.sleep(random.random() * 2)
except Exception as e:
result = "error: %s" % str(e)
except:
result = str(sys.exc_info()[0])
finally:
return result or "None"
print "\nRunning {0} jobs on {1} threads...".format(num_jobs, concurrent)
q = Queue()
for i in range(concurrent):
t = Thread(target=doWork)
t.daemon = True
t.start()
for x in range(num_jobs):
q.put("something")
try:
q.join()
except KeyboardInterrupt:
sys.exit(1)
total_responses = 0
for result in results:
num_responses = results[result]
print "{0}: {1} time(s)".format(result, num_responses)
total_responses += num_responses
print "Number of elements processed: {0}".format(total_responses)
Tim Peters hit the nail on the head in the comments. The issue is that the tracking of results is threaded and isn't protected by any sort of mutex. That allows something like this to happen:
thread A gets result: "all good"
thread A checks results[result]
thread A sees no such key
thread A suspends # <-- before counting its result
thread B gets result: "all good"
thread B checks results[result]
thread B sees no such key
thread B sets results['all good'] = 1
thread C ...
thread C sets results['all good'] = 2
thread D ...
thread A resumes # <-- and remembers it needs to count its result still
thread A sets results['all good'] = 1 # resetting previous work!
A more typical workflow might have a results queue that the main thread is listening on.
workq = queue.Queue()
resultsq = queue.Queue()
make_work(into=workq)
do_work(from=workq, respond_on=resultsq)
# do_work would do respond_on.put_nowait(result) instead of
# return result
results = {}
while True:
try:
result = resultsq.get()
except queue.Empty:
break # maybe? You'd probably want to retry a few times
results.setdefault(result, 0) += 1
Related
I can not break the following function if an error occurs.
def run(self, max_workers=10):
outputs = {}
q = queue.Queue()
for key, ground_truth in self.ground_truths.items():
q.put((key, ground_truth))
count = {}
count['total_finish'] = 0
start_time = time.time()
def worker():
while True:
try:
key, value = self.pred_on_one_image(q.get())
outputs[key] = value
count['total_finish'] += 1
except:
os._exit()
finally:
q.task_done()
for i in range(max_workers):
t = Thread(target=worker)
t.daemon = True
t.start()
q.join()
return outputs
I tried to use return, q.put(None), sys.exit(), but all of them not work, I have to manually Ctrl+C to break it.
quit() and exit() usually work for me.
Set q.get(block=False) to raise Empty Exception if queue is empty. Otherwise, the queue will wait until the item is available in Queue. The default value of block is True, therefore, the queue was being blocked and no exception was raised.
I have 2 scripts that start multiple processes. Right now I'm opening up two different terminals and running python start.py to start both the scripts. How can I achieve this with one command, or one running one script.
Start.py 1
# globals
my_queue = multiprocessing.Manager().Queue() # queue to store our values
stop_event = multiprocessing.Event() # flag which signals processes to stop
my_pool = None
def my_function(foo):
print("starting %s" % foo)
try:
addnews.varfoo)
except Exception,e:
print str(e)
MAX_PROCESSES = 50
my_pool = multiprocessing.Pool(MAX_PROCESSES)
x = Var.objects.order_by('name').values('link')
for t in x:
t = t.values()[0]
my_pool.apply_async(my_function, args=(t,))
my_pool.close()
my_pool.join()
Start1.py 2
# globals
MAX_PROCESSES = 50
my_queue = multiprocessing.Manager().Queue() # queue to store our values
stop_event = multiprocessing.Event() # flag which signals processes to stop
my_pool = None
def my_function(var):
var.run_main(var)
stop_event.set()
def var_scanner():
# Since `t` could have unlimited size we'll put all `t` value in queue
while not stop_event.is_set(): # forever scan `values` for new items
y = Var.objects.order_by('foo).values('foo__foo')
for t in y:
t = t.values()[0]
my_queue.put(t)
try:
var_scanner_process = multiprocessing.Process(target=var_scanner)
var_scanner_process.start()
my_pool = multiprocessing.Pool(MAX_PROCESSES)
#while not stop_event.is_set():
try: # if queue isn't empty, get value from queue and create new process
var = my_queue.get_nowait() # getting value from queue
p = multiprocessing.Process(target=my_function, args=(var,))
p.start()
except Queue.Empty:
print "No more items in queue"
time.sleep(1)
#stop_event.set()
except KeyboardInterrupt as stop_test_exception:
print(" CTRL+C pressed. Stopping test....")
stop_event.set()
You can run the first script in the background on the same terminal, using the & shell modifier.
python start.py &
python start1.py
I am using Threads from the threading class for the first time and they don't seem to be freeing themselves up after the function runs. I am attempting to have a max of 5 threads running at once. Since one thread creates the next there will be some overlap but I'm seeing 2000+ threads running at once before I get the exception "can't start new thread".
from threading import Thread
import string
URLS = ['LONG LIST OF URLS HERE']
currentThread = 0
LASTTHREAD = len(URLS) - 1
MAXTHREADS = 5
threads = [None] * (LASTTHREAD + 1)
def getURL(threadName, currentThread):
print('Thread Name = ' + threadName)
print('URL = ' + str(URLS[currentThread]))
if currentThread < LASTTHREAD:
currentThread = currentThread + 1
thisThread = currentThread
try:
threads[thisThread] = Thread(target = getURL, args = ('thread' + str(thisThread), currentThread, ))
threads[thisThread].start()
threads[thisThread].join()
except Exception,e:
print "Error: unable to start thread"
print str(e)
for i in range(0, MAXTHREADS):
currentThread = currentThread + 1
try:
threads[i] = Thread(target = getURL, args = ('thread' + str(i), currentThread, ))
threads[i].start()
threads[i].join()
except Exception,e:
print "Error: unable to start thread"
print str(e)
I'm open to any other cleaning up I can do here as well since I'm pretty new to python and entirely new to threading. I'm just trying to get the threading set up properly at this point. Eventually this will scrape the URLS.
I'd suggest looking into a thread pool, and having the threads take tasks from a suitable shared data structure (e.g. a queue) rather than starting new threads all the time.
Depending on what it is you actually want to do, if you're using CPython (if you don't know what I mean by CPython, you will be) you might not actually get any performance improvement from using threads (due to global interpreter lock). So you might be better off looking into the multiprocessing module.
from Queue import Queue
from threading import Thread
def worker():
while True:
item = q.get()
do_work(item)
q.task_done()
def do_work(url):
print "Processing URL:" + url
q = Queue()
for i in range(5):
t = Thread(target=worker)
t.daemon = True
t.start()
for item in ['url_' + str(i) for i in range(2000)]:
q.put(item)
q.join() # block until all tasks are done
I'm doing a project involving data collection and logging. I have 2 threads running, a collection thread and a logging thread, both started in main. I'm trying to allow the program to be terminated gracefully when with Ctrl-C.
I'm using a threading.Event to signal to the threads to end their respective loops. It works fine to stop the sim_collectData method, but it doesn't seem to be properly stopping the logData thread. The Collection terminated print statement is never executed, and the program just stalls. (It doesn't end, just sits there).
The second while loop in logData is to make sure everything in the queue is logged. The goal is for Ctrl-C to stop the collection thread immediately, then allow the logging thread to finish emptying the queue, and only then fully terminate the program. (Right now, the data is just being printed out - eventually it's going to be logged to a database).
I don't understand why the second thread never terminates. I'm basing what I've done on this answer: Stopping a thread after a certain amount of time. What am I missing?
def sim_collectData(input_queue, stop_event):
''' this provides some output simulating the serial
data from the data logging hardware.
'''
n = 0
while not stop_event.is_set():
input_queue.put("DATA: <here are some random data> " + str(n))
stop_event.wait(random.randint(0,5))
n += 1
print "Terminating data collection..."
return
def logData(input_queue, stop_event):
n = 0
# we *don't* want to loop based on queue size because the queue could
# theoretically be empty while waiting on some data.
while not stop_event.is_set():
d = input_queue.get()
if d.startswith("DATA:"):
print d
input_queue.task_done()
n += 1
# if the stop event is recieved and the previous loop terminates,
# finish logging the rest of the items in the queue.
print "Collection terminated. Logging remaining data to database..."
while not input_queue.empty():
d = input_queue.get()
if d.startswith("DATA:"):
print d
input_queue.task_done()
n += 1
return
def main():
input_queue = Queue.Queue()
stop_event = threading.Event() # used to signal termination to the threads
print "Starting data collection thread...",
collection_thread = threading.Thread(target=sim_collectData, args=(input_queue, stop_event))
collection_thread.start()
print "Done."
print "Starting logging thread...",
logging_thread = threading.Thread(target=logData, args=(input_queue, stop_event))
logging_thread.start()
print "Done."
try:
while True:
time.sleep(10)
except (KeyboardInterrupt, SystemExit):
# stop data collection. Let the logging thread finish logging everything in the queue
stop_event.set()
main()
The problem is that your logger is waiting on d = input_queue.get() and will not check the event. One solution is to skip the event completely and invent a unique message that tells the logger to stop. When you get a signal, send that message to the queue.
import threading
import Queue
import random
import time
def sim_collectData(input_queue, stop_event):
''' this provides some output simulating the serial
data from the data logging hardware.
'''
n = 0
while not stop_event.is_set():
input_queue.put("DATA: <here are some random data> " + str(n))
stop_event.wait(random.randint(0,5))
n += 1
print "Terminating data collection..."
input_queue.put(None)
return
def logData(input_queue):
n = 0
# we *don't* want to loop based on queue size because the queue could
# theoretically be empty while waiting on some data.
while True:
d = input_queue.get()
if d is None:
input_queue.task_done()
return
if d.startswith("DATA:"):
print d
input_queue.task_done()
n += 1
def main():
input_queue = Queue.Queue()
stop_event = threading.Event() # used to signal termination to the threads
print "Starting data collection thread...",
collection_thread = threading.Thread(target=sim_collectData, args=(input_queue, stop_event))
collection_thread.start()
print "Done."
print "Starting logging thread...",
logging_thread = threading.Thread(target=logData, args=(input_queue,))
logging_thread.start()
print "Done."
try:
while True:
time.sleep(10)
except (KeyboardInterrupt, SystemExit):
# stop data collection. Let the logging thread finish logging everything in the queue
stop_event.set()
main()
I'm not an expert in threading, but in your logData function the first d=input_queue.get() is blocking, i.e., if the queue is empty it will sit an wait forever until a queue message is received. This is likely why the logData thread never terminates, it's sitting waiting forever for a queue message.
Refer to the [Python docs] to change this to a non-blocking queue read: use .get(False) or .get_nowait() - but either will require some exception handling for cases when the queue is empty.
You are calling a blocking get on your input_queue with no timeout. In either section of logData, if you call input_queue.get() and the queue is empty, it will block indefinitely, preventing the logging_thread from reaching completion.
To fix, you will want to call input_queue.get_nowait() or pass a timeout to input_queue.get().
Here is my suggestion:
def logData(input_queue, stop_event):
n = 0
while not stop_event.is_set():
try:
d = input_queue.get_nowait()
if d.startswith("DATA:"):
print "LOG: " + d
n += 1
except Queue.Empty:
time.sleep(1)
return
You are also signalling the threads to terminate, but not waiting for them to do so. Consider doing this in your main function.
try:
while True:
time.sleep(10)
except (KeyboardInterrupt, SystemExit):
stop_event.set()
collection_thread.join()
logging_thread.join()
Based on the answer of tdelaney I created an iterator based approach. The iterator exits when the termination message is encountered. I also added a counter of how many get-calls are currently blocking and a stop-method, which sends just as many termination messages. To prevent a race condition between incrementing and reading the counter, I'm setting a stopping bit there. Furthermore I don't use None as the termination message, because it can not necessarily be compared to other data types when using a PriorityQueue.
There are two restrictions, that I had no need to eliminate. For one the stop-method first waits until the queue is empty before shutting down the threads. The second restriction is, that I did not any code to make the queue reusable after stop. The latter can probably be added quite easily, while the former requires being careful about concurrency and the context in which the code is used.
You have to decide whether you want stop to also wait for all the termination messages to be consumed. I choose to put the necessary join there, but you may just remove it.
So this is the code:
import threading, queue
from functools import total_ordering
#total_ordering
class Final:
def __repr__(self):
return "∞"
def __lt__(self, other):
return False
def __eq__(self, other):
return isinstance(other, Final)
Infty = Final()
class IterQueue(queue.Queue):
def __init__(self):
self.lock = threading.Lock()
self.stopped = False
self.getters = 0
super().__init__()
def __iter__(self):
return self
def get(self):
raise NotImplementedError("This queue may only be used as an iterator.")
def __next__(self):
with self.lock:
if self.stopped:
raise StopIteration
self.getters += 1
data = super().get()
if data == Infty:
self.task_done()
raise StopIteration
with self.lock:
self.getters -= 1
return data
def stop(self):
self.join()
self.stopped = True
with self.lock:
for i in range(self.getters):
self.put(Infty)
self.join()
class IterPriorityQueue(IterQueue, queue.PriorityQueue):
pass
Oh, and I wrote this in python 3.2. So after backporting,
import threading, Queue
from functools import total_ordering
#total_ordering
class Final:
def __repr__(self):
return "Infinity"
def __lt__(self, other):
return False
def __eq__(self, other):
return isinstance(other, Final)
Infty = Final()
class IterQueue(Queue.Queue, object):
def __init__(self):
self.lock = threading.Lock()
self.stopped = False
self.getters = 0
super(IterQueue, self).__init__()
def __iter__(self):
return self
def get(self):
raise NotImplementedError("This queue may only be used as an iterator.")
def next(self):
with self.lock:
if self.stopped:
raise StopIteration
self.getters += 1
data = super(IterQueue, self).get()
if data == Infty:
self.task_done()
raise StopIteration
with self.lock:
self.getters -= 1
return data
def stop(self):
self.join()
self.stopped = True
with self.lock:
for i in range(self.getters):
self.put(Infty)
self.join()
class IterPriorityQueue(IterQueue, Queue.PriorityQueue):
pass
you would use it as
import random
import time
def sim_collectData(input_queue, stop_event):
''' this provides some output simulating the serial
data from the data logging hardware.
'''
n = 0
while not stop_event.is_set():
input_queue.put("DATA: <here are some random data> " + str(n))
stop_event.wait(random.randint(0,5))
n += 1
print "Terminating data collection..."
return
def logData(input_queue):
n = 0
# we *don't* want to loop based on queue size because the queue could
# theoretically be empty while waiting on some data.
for d in input_queue:
if d.startswith("DATA:"):
print d
input_queue.task_done()
n += 1
def main():
input_queue = IterQueue()
stop_event = threading.Event() # used to signal termination to the threads
print "Starting data collection thread...",
collection_thread = threading.Thread(target=sim_collectData, args=(input_queue, stop_event))
collection_thread.start()
print "Done."
print "Starting logging thread...",
logging_thread = threading.Thread(target=logData, args=(input_queue,))
logging_thread.start()
print "Done."
try:
while True:
time.sleep(10)
except (KeyboardInterrupt, SystemExit):
# stop data collection. Let the logging thread finish logging everything in the queue
stop_event.set()
input_queue.stop()
main()
I use python multiprocessing library for an algorithm in which I have many workers processing certain data and returning result to the parent process. I use multiprocessing.Queue for passing jobs to workers, and second to collect results.
It all works pretty well, until worker fails to process some chunk of data. In the simplified example below each worker has two phases:
initialization - can fail, in this case worker should be destroyed
data processing - processing a chunk of data can fail, in this case worker should skip this chunk and continue with next data.
When either of this phases fails I get a deadlock after script completion. This code simulates my problem:
import multiprocessing as mp
import random
workers_count = 5
# Probability of failure, change to simulate failures
fail_init_p = 0.2
fail_job_p = 0.3
#========= Worker =========
def do_work(job_state, arg):
if random.random() < fail_job_p:
raise Exception("Job failed")
return "job %d processed %d" % (job_state, arg)
def init(args):
if random.random() < fail_init_p:
raise Exception("Worker init failed")
return args
def worker_function(args, jobs_queue, result_queue):
# INIT
# What to do when init() fails?
try:
state = init(args)
except:
print "!Worker %d init fail" % args
return
# DO WORK
# Process data in the jobs queue
for job in iter(jobs_queue.get, None):
try:
# Can throw an exception!
result = do_work(state, job)
result_queue.put(result)
except:
print "!Job %d failed, skip..." % job
finally:
jobs_queue.task_done()
# Telling that we are done with processing stop token
jobs_queue.task_done()
#========= Parent =========
jobs = mp.JoinableQueue()
results = mp.Queue()
for i in range(workers_count):
mp.Process(target=worker_function, args=(i, jobs, results)).start()
# Populate jobs queue
results_to_expect = 0
for j in range(30):
jobs.put(j)
results_to_expect += 1
# Collecting the results
# What if some workers failed to process the job and we have
# less results than expected
for r in range(results_to_expect):
result = results.get()
print result
#Signal all workers to finish
for i in range(workers_count):
jobs.put(None)
#Wait for them to finish
jobs.join()
I have two question about this code:
When init() fails, how to detect that worker is invalid and not to wait for it to finish?
When do_work() fails, how to notify parent process that less results should be expected in the results queue?
Thank you for help!
I changed your code slightly to make it work (see explanation below).
import multiprocessing as mp
import random
workers_count = 5
# Probability of failure, change to simulate failures
fail_init_p = 0.5
fail_job_p = 0.4
#========= Worker =========
def do_work(job_state, arg):
if random.random() < fail_job_p:
raise Exception("Job failed")
return "job %d processed %d" % (job_state, arg)
def init(args):
if random.random() < fail_init_p:
raise Exception("Worker init failed")
return args
def worker_function(args, jobs_queue, result_queue):
# INIT
# What to do when init() fails?
try:
state = init(args)
except:
print "!Worker %d init fail" % args
result_queue.put('init failed')
return
# DO WORK
# Process data in the jobs queue
for job in iter(jobs_queue.get, None):
try:
# Can throw an exception!
result = do_work(state, job)
result_queue.put(result)
except:
print "!Job %d failed, skip..." % job
result_queue.put('job failed')
#========= Parent =========
jobs = mp.Queue()
results = mp.Queue()
for i in range(workers_count):
mp.Process(target=worker_function, args=(i, jobs, results)).start()
# Populate jobs queue
results_to_expect = 0
for j in range(30):
jobs.put(j)
results_to_expect += 1
init_failures = 0
job_failures = 0
successes = 0
while job_failures + successes < 30 and init_failures < workers_count:
result = results.get()
init_failures += int(result == 'init failed')
job_failures += int(result == 'job failed')
successes += int(result != 'init failed' and result != 'job failed')
#print init_failures, job_failures, successes
for ii in range(workers_count):
jobs.put(None)
My changes:
Changed jobs to be just a normal Queue (instead of JoinableQueue).
Workers now communicate back special results strings "init failed" and "job failed".
The master process monitors for the said special results so long as specific conditions are in effect.
In the end, put "stop" requests (i.e. None jobs) for however many workers you have, regardless. Note that not all of these may be pulled from the queue (in case the worker failed to initalize).
By the way, your original code was nice and easy to work with. The random probabilities bit is pretty cool.