I am trying to run a code where I am working with Queue and threads.
Below is the code snippet:
import threading
from Queue import Queue
def function():
results = []
for i in range(68):
return_list = function2(i)
results.append(return_list)
if return_list:
print("True returned")
else:
print("Returned false")
return results
def function2(i):
print("In function 2 with: " + str(i))
results = []
working_queue = Queue()
for _ in range(25):
worker = threading.Thread(target=function3, args=(working_queue, results))
worker.setDaemon(True)
worker.start()
for p in range(150):
working_queue.put(p)
working_queue.join()
return results
def function3(working_queue, results):
while True:
try:
current_item = working_queue.get()
print("Processing:" + str(current_item))
results.append("True")
except Exception as e:
print("An exception in function 3: " + str(e))
finally:
working_queue.task_done()
if __name__ == "__main__":
results = function()
print(str(results))
The code raises the following exception:
Traceback (most recent call last):
File "C:/pythonErrors/stackoverflow.py", line 45, in <module>
function()
File "C:/pythonErrors/stackoverflow.py", line 8, in function
return_list = function2(i)
File "C:/pythonErrors/stackoverflow.py", line 24, in function2
worker.start()
File "C:\Python27\lib\threading.py", line 736, in start
_start_new_thread(self.__bootstrap, ())
thread.error: can't start new thread
How can we delete the previously created and completed threads. So that with each for loop execution in function2() new threads are not created.
The requirement is to create only 25 threads for the whole process (currently total 68x25 threads are being created)
Since, the requirement was to work with 25 threads in total; creation of threads should be the part of outermost function (i.e. function() here).
Below is the solution:
import threading
from Queue import Queue
def function():
results = []
working_queue = Queue()
for _ in range(25):
worker = threading.Thread(target=function3, args=(working_queue, results))
worker.setDaemon(True)
worker.start()
for i in range(68):
return_list = function2(i, working_queue)
working_queue.join()
results.append(return_list)
if return_list:
print("True returned")
else:
print("Returned false")
return results
def function2(i, working_queue):
print("In function 2 with: " + str(i))
for p in range(150):
working_queue.put(p)
def function3(working_queue, results):
while True:
try:
current_item = working_queue.get()
print("Processing:" + str(current_item))
results.append("True")
except Exception as e:
print("An exception in function 3: " + str(e))
finally:
working_queue.task_done()
if __name__ == "__main__":
results = function()
print(str(results))
Related
I need to use a pool to asynchronously parse results coming from an extraction method and send those results to a write queue.
I have tried this: but it seems to just run iteratively... one process after the other.
process_pool = Pool(processes=30, maxtasksperchild=1)
while True:
filepath = read_queue.get(True)
if filepath is None:
break
res = process_pool.apply_async(func=process.run, args=(filepath, final_path), callback=write_queue.put)
results.append(res)
for result in results:
result.wait()
process_pool.close()
process_pool.join()
I have also tried just waiting on each result, but that does the same thing as the above:
process_pool = Pool(processes=30, maxtasksperchild=1)
while True:
filepath = read_queue.get(True)
if filepath is None:
break
res = process_pool.apply_async(func=process.run, args=(filepath, final_path), callback=write_queue.put)
res.wait()
process_pool.close()
process_pool.join()
I also tried just scheduling processes and letting the pool block itself if it's out of workers to spawn:
process_pool = Pool(processes=30, maxtasksperchild=1)
while True:
filepath = read_queue.get(True)
if filepath is None:
break
process_pool.apply_async(func=process.run, args=(filepath, final_path), callback=write_queue.put)
process_pool.close()
process_pool.join()
This doesn't work, and just runs through the processes over and over, not actually running any sort of function and I'm not sure why. It seems I have to do something with the AsyncResult for the pool to actually schedule the process.
I need it to work like this:
When there is a result waiting in the queue, spawn a new process in the pool with that specific argument from the queue.
On callback, put that processed result in the write queue.
However, I can't seem to get it to work asynchronously correctly. It will only work iteratively because I have to do something with result to actually get the task to schedule properly. Whether that is a .get, .wait, whatever.
# write.py
def write(p_list):
outfile = Path('outfile.txt.bz2')
for data in p_list:
if Path.exists(outfile):
mode = 'ab'
else:
mode = 'wb'
with bz2.open(filename=outfile, mode=mode, compresslevel=9) as output:
temp = (str(data) + '\n').encode('utf-8')
output.write(temp)
print('JSON files written', flush=True)
class Write(Process):
def __init__(self, write_queue: Queue):
Process.__init__(self)
self.write_queue = write_queue
def run(self):
while True:
try:
p_list = self.write_queue.get(True, 900)
except Empty:
continue
if p_list is None:
break
write(p_list)
-
# process.py
def parse(data: int):
global json_list
time.sleep(.1) # simulate parsing the json
json_list.append(data)
def read(data: int):
time.sleep(.1)
parse(data)
def run(data: int):
global json_list
json_list = []
read(data)
return json_list
if __name__ == '__main__':
global output_path, json_list
-
# main.py
if __name__ == '__main__':
read_queue = Queue()
write_queue = Queue()
write = Write(write_queue=write_queue)
write.daemon = True
write.start()
for i in range(0, 1000000):
read_queue.put(i)
read_queue.put(None)
process_pool = Pool(processes=30, maxtasksperchild=1)
while True:
data = read_queue.get(True)
if data is None:
break
res = process_pool.apply_async(func=process.run, args=(data,), callback=write_queue.put)
write_queue.put(None)
process_pool.close()
process_pool.join()
write.join()
print('process done')
So, the problem is that there is no problem. I'm just stupid. If you define a max task per worker of 1, the processes will schedule very quickly and it will look like nothing is happening (or maybe im the only one who thought that).
Here's a reasonable way to use an asynchronous process pool correctly within a while loop with a maxtasksperchild of 1
if __name__ == '__main__':
def func(elem):
time.sleep(0.5)
return elem
def callback(elem):
# do something with processed data
pass
queue = multiprocessing.Queue()
for i in range(0, 10000):
queue.put(i)
process_pool = multiprocessing.Pool(processes=num_processes, maxtasksperchild=1)
results = []
while True:
data = queue.get(True)
if data is None:
break
res = process_pool.apply_async(func=func, args=(data,), callback=callback)
results.append(res)
flag = False
for i, res in enumerate(results):
try:
res.wait(600)
# do some logging
results[i] = None
except TimeoutError:
flag = True
# do some logging
process_pool.close()
if flag:
process_pool.terminate()
process_pool.join()
# done!
I can not break the following function if an error occurs.
def run(self, max_workers=10):
outputs = {}
q = queue.Queue()
for key, ground_truth in self.ground_truths.items():
q.put((key, ground_truth))
count = {}
count['total_finish'] = 0
start_time = time.time()
def worker():
while True:
try:
key, value = self.pred_on_one_image(q.get())
outputs[key] = value
count['total_finish'] += 1
except:
os._exit()
finally:
q.task_done()
for i in range(max_workers):
t = Thread(target=worker)
t.daemon = True
t.start()
q.join()
return outputs
I tried to use return, q.put(None), sys.exit(), but all of them not work, I have to manually Ctrl+C to break it.
quit() and exit() usually work for me.
Set q.get(block=False) to raise Empty Exception if queue is empty. Otherwise, the queue will wait until the item is available in Queue. The default value of block is True, therefore, the queue was being blocked and no exception was raised.
I have a very simple threading example using Python 3.4.2. In this example I am creating a five threads that just returns the character string "Result" and appends it to an array titled thread. In another for loop iterated five times the threads are joined to the term x. I am trying to print the result x, which should yield a list that looks like ['Resut','Result','Result','Result','Result'] but instead the print command only yields the title of the thread and the fact that it is closed. Im obviously misunderstanding how to use threads in python. If someone could provide an example of how to adequately complete this test case I would be very grateful.
import threading
def Thread_Test():
return ("Result")
number = 5
threads = []
for i in range(number):
Result = threading.Thread(target=Thread_Test)
threads.append(Result)
Result.start()
for x in threads:
x.join()
print (x)
There is a difference between creating a thread and trying to get values out of a thread. Generally speaking, you should never try to use return in a thread to provide a value back to its caller. That is not how threads work. When you create a thread object, you have to figure out a different way of get any values calculated in the thread to some other part of your program. The following is a simple example showing how values might be returned using a list.
#! /usr/bin/env python3
import threading
def main():
# Define a few variables including storage for threads and values.
threads_to_create = 5
threads = []
results = []
# Create, start, and store all of the thread objects.
for number in range(threads_to_create):
thread = threading.Thread(target=lambda: results.append(number))
thread.start()
threads.append(thread)
# Ensure all threads are done and show the results.
for thread in threads:
thread.join()
print(results)
if __name__ == '__main__':
main()
If you absolutely insist that you must have the ability to return values from the target of a thread, it is possible to override some methods in threading.Thread using a child class to get the desired behavior. The following shows more advanced usage and demonstrates how multiple methods require a change in case someone desires to inherit from and override the run method of the new class. This code is provided for completeness and probably should not be used.
#! /usr/bin/env python3
import sys as _sys
import threading
def main():
# Define a few variables including storage for threads.
threads_to_create = 5
threads = []
# Create, start, and store all of the thread objects.
for number in range(threads_to_create):
thread = ThreadWithReturn(target=lambda: number)
thread.start()
threads.append(thread)
# Ensure all threads are done and show the results.
print([thread.returned for thread in threads])
class ThreadWithReturn(threading.Thread):
def __init__(self, group=None, target=None, name=None,
args=(), kwargs=None, *, daemon=None):
super().__init__(group, target, name, args, kwargs, daemon=daemon)
self.__value = None
def run(self):
try:
if self._target:
return self._target(*self._args, **self._kwargs)
finally:
del self._target, self._args, self._kwargs
def _bootstrap_inner(self):
try:
self._set_ident()
self._set_tstate_lock()
self._started.set()
with threading._active_limbo_lock:
threading._active[self._ident] = self
del threading._limbo[self]
if threading._trace_hook:
_sys.settrace(threading._trace_hook)
if threading._profile_hook:
threading. _sys.setprofile(threading._profile_hook)
try:
self.__value = True, self.run()
except SystemExit:
pass
except:
exc_type, exc_value, exc_tb = self._exc_info()
self.__value = False, exc_value
if _sys and _sys.stderr is not None:
print("Exception in thread %s:\n%s" %
(self.name, threading._format_exc()), file=_sys.stderr)
elif self._stderr is not None:
try:
print((
"Exception in thread " + self.name +
" (most likely raised during interpreter shutdown):"), file=self._stderr)
print((
"Traceback (most recent call last):"), file=self._stderr)
while exc_tb:
print((
' File "%s", line %s, in %s' %
(exc_tb.tb_frame.f_code.co_filename,
exc_tb.tb_lineno,
exc_tb.tb_frame.f_code.co_name)), file=self._stderr)
exc_tb = exc_tb.tb_next
print(("%s: %s" % (exc_type, exc_value)), file=self._stderr)
finally:
del exc_type, exc_value, exc_tb
finally:
pass
finally:
with threading._active_limbo_lock:
try:
del threading._active[threading.get_ident()]
except:
pass
#property
def returned(self):
if self.__value is None:
self.join()
if self.__value is not None:
valid, value = self.__value
if valid:
return value
raise value
if __name__ == '__main__':
main()
please find the below simple example for queue and threads,
import threading
import Queue
import timeit
q = Queue.Queue()
number = 5
t1 = timeit.default_timer()
# Step1: For example, we are running multiple functions normally
result = []
def fun(x):
result.append(x)
return x
for i in range(number):
fun(i)
print result ," # normal result"
print (timeit.default_timer() - t1)
t2 = timeit.default_timer()
#Step2: by using threads and queue
def fun_thrd(x,q):
q.put(x)
return
for i in range(number):
t1 = threading.Thread(target = fun_thrd, args=(i,q))
t1.start()
t1.join()
thrd_result = []
while True:
if not q.empty():
thrd_result.append(q.get())
else:
break
print thrd_result , "# result with threads involved"
print (timeit.default_timer() - t2)
t3 = timeit.default_timer()
#step :3 if you want thread to be run without depending on the previous thread
threads = []
def fun_thrd_independent(x,q):
q.put(x)
return
def thread_indep(number):
for i in range(number):
t = threading.Thread(target = fun_thrd_independent, args=(i,q))
t.start()
threads.append(t)
thread_indep(5)
for j in threads:
j.join()
thread_indep_result = []
while True:
if not q.empty():
thread_indep_result.append(q.get())
else:
break
print thread_indep_result # result when threads are independent on each other
print (timeit.default_timer() - t3)
output:
[0, 1, 2, 3, 4] # normal result
3.50475311279e-05
[0, 1, 2, 3, 4] # result with threads involved
0.000977039337158
[0, 1, 2, 3, 4] result when threads are independent on each other
0.000933170318604
It will hugely differ according to the scale of the data
Hope this helps, Thanks
I have noticed that when I have many threads pulling elements from a queue, there are less elements processed than the number that I put into the queue. This is sporadic but seems to happen somewhere around half the time when I run the following code.
#!/bin/env python
from threading import Thread
import httplib, sys
from Queue import Queue
import time
import random
concurrent = 500
num_jobs = 500
results = {}
def doWork():
while True:
result = None
try:
result = curl(q.get())
except Exception as e:
print "Error when trying to get from queue: {0}".format(str(e))
if results.has_key(result):
results[result] += 1
else:
results[result] = 1
try:
q.task_done()
except:
print "Called task_done when all tasks were done"
def curl(ourl):
result = 'all good'
try:
time.sleep(random.random() * 2)
except Exception as e:
result = "error: %s" % str(e)
except:
result = str(sys.exc_info()[0])
finally:
return result or "None"
print "\nRunning {0} jobs on {1} threads...".format(num_jobs, concurrent)
q = Queue()
for i in range(concurrent):
t = Thread(target=doWork)
t.daemon = True
t.start()
for x in range(num_jobs):
q.put("something")
try:
q.join()
except KeyboardInterrupt:
sys.exit(1)
total_responses = 0
for result in results:
num_responses = results[result]
print "{0}: {1} time(s)".format(result, num_responses)
total_responses += num_responses
print "Number of elements processed: {0}".format(total_responses)
Tim Peters hit the nail on the head in the comments. The issue is that the tracking of results is threaded and isn't protected by any sort of mutex. That allows something like this to happen:
thread A gets result: "all good"
thread A checks results[result]
thread A sees no such key
thread A suspends # <-- before counting its result
thread B gets result: "all good"
thread B checks results[result]
thread B sees no such key
thread B sets results['all good'] = 1
thread C ...
thread C sets results['all good'] = 2
thread D ...
thread A resumes # <-- and remembers it needs to count its result still
thread A sets results['all good'] = 1 # resetting previous work!
A more typical workflow might have a results queue that the main thread is listening on.
workq = queue.Queue()
resultsq = queue.Queue()
make_work(into=workq)
do_work(from=workq, respond_on=resultsq)
# do_work would do respond_on.put_nowait(result) instead of
# return result
results = {}
while True:
try:
result = resultsq.get()
except queue.Empty:
break # maybe? You'd probably want to retry a few times
results.setdefault(result, 0) += 1
New to Python multi-thread and write such simple program, here is my code and error message, any ideas what is wrong? Thanks.
Using Python 2.7.
import time
import thread
def uploader(threadName):
while True:
time.sleep(5)
print threadName
if __name__ == "__main__":
numOfThreads = 5
try:
i = 0
while i < numOfThreads:
thread.start_new_thread(uploader, ('thread'+str(i)))
i += 1
print 'press any key to exit test'
n=raw_input()
except:
print "Error: unable to start thread"
Unhandled exception in thread started by <pydev_monkey._NewThreadStartupWithTrace instance at 0x10e12c830>
Traceback (most recent call last):
File "/Applications/PyCharm CE.app/Contents/helpers/pydev/pydev_monkey.py", line 521, in __call__
return self.original_func(*self.args, **self.kwargs)
TypeError: uploader() takes exactly 1 argument (7 given)
thanks in advance,
Lin
The args of thread.start_new_thread need to be a tuple. Instead of this:
('thread' + str(i)) # results in a string
Try this for the args:
('thread' + str(i),) # a tuple with a single element
Incidentally, you should check out the threading module, which is a higher-level interface than thread.
In the following, threadName is now a global variable defined towards the top of the program code, then the variable is initialized before the new thread is started with the target being the upload function.
Try this:
import time
import thread
threadName = ''
def uploader():
while True:
time.sleep(5)
print threadName
if __name__ == "__main__":
numOfThreads = 5
try:
i = 0
while i < numOfThreads:
threadName = 'thread' + str(i)
newThread = threading.Thread(target=uploader)
newThread.start()
i += 1
print 'press any key to exit test'
n=raw_input()
except:
print "Error: unable to start thread"