I'm trying to use python multiprocessing.Queue in my code:
import multiprocessing as mp
import datetime as dt
def function_to_get_from_q(Queue):
#while not Queue.empty():
print(Queue.get())
def collect(Queue):
for i in range(10000):
Queue.put([i, (dt.datetime.utcnow() + dt.timedelta(hours=5, minutes=30)).strftime('%H:%M:%S')])
if __name__ == "__main__":
Q = mp.Queue()
process1 = mp.Process(target=collect, args=(Q,))
process2 = mp.Process(target=function_to_get_from_q, args=(Q,))
process1.start()
process2.start()
I'm expecting that I get all the lists of number and current time. But I'm getting only first element ([0, '14:53:52']).
Where did I go wrong?
The while loop you have in your code is wrong because you don't want the function_to_get_from_q() process to quit everytime it checks the queue and it's empty. In the code below, a special value is put() into the queue to indicate that it's the last one.
import multiprocessing as mp
import datetime as dt
SENTINEL = 'stop'
def function_to_get_from_q(queue):
while (value := queue.get()) != SENTINEL:
print(value)
def collect(queue):
for i in range(10000):
t = dt.datetime.utcnow() + dt.timedelta(hours=5, minutes=30)
queue.put([i, t.strftime('%H:%M:%S')])
queue.put(SENTINEL) # Indicate end.
if __name__ == "__main__":
queue = mp.Queue()
process1 = mp.Process(target=collect, args=(queue,))
process2 = mp.Process(target=function_to_get_from_q, args=(queue,))
process1.start()
process2.start()
print('fini')
The solution using a special sentinel value offered by martineau, which I have upvoted, is one way. I just wanted to show a second method that uses a multiprocessing.JoinableQueue. In this case process2 would be a dameon process that will automatically terminate when the main process terminates since it loops forever trying to get items from the queue:
import multiprocessing as mp
import datetime as dt
def function_to_get_from_q(queue):
while True:
value = queue.get()
print(value)
queue.task_done()
def collect(queue):
for i in range(1000):
t = dt.datetime.utcnow() + dt.timedelta(hours=5, minutes=30)
queue.put([i, t.strftime('%H:%M:%S')])
# Wait for all work placed on queue to be completed:
queue.join()
if __name__ == "__main__":
queue = mp.JoinableQueue()
process1 = mp.Process(target=collect, args=(queue,))
# This needs to be a daemon process since it never terminates:
process2 = mp.Process(target=function_to_get_from_q, args=(queue,), daemon=True)
process1.start()
process2.start()
# We must explicitly wait for process1 to complete to ensure
# that our daemon process is not prematurely terminated:
process1.join()
print('fini')
If you don't wish to use a daemon thread, then we must explicitly terminate process2:
import multiprocessing as mp
import datetime as dt
def function_to_get_from_q(queue):
while True:
value = queue.get()
print(value)
queue.task_done()
def collect(queue):
for i in range(1000):
t = dt.datetime.utcnow() + dt.timedelta(hours=5, minutes=30)
queue.put([i, t.strftime('%H:%M:%S')])
# Wait for all work placed on queue to be completed:
queue.join()
if __name__ == "__main__":
queue = mp.JoinableQueue()
process1 = mp.Process(target=collect, args=(queue,))
# This needs to be a daemon process since it never terminates:
process2 = mp.Process(target=function_to_get_from_q, args=(queue,))
process1.start()
process2.start()
# We must explicitly wait for process1 to complete so that
# we know all work has been completed:
process1.join()
# Kill process2:
process2.terminate()
print('fini')
Related
I want to start 4 process which put an integer in queue when counter is divisible by 100.Same time another process continuously read it and print it.Please correct my code to run...I am getting an error ['Queue' object is not iterable]
from multiprocessing import Lock, Process, Queue, current_process
import time
import queue
def doFirstjob(process_Queue):
i=0
while True:
if i%100==0:
process_Queue.put(i)
else:
i+=1
def doSecondjob(process_Queue):
while(1):
if not process_Queue.Empty:
task = process_Queue.get()
print("task: ",task)
else:
time.sleep(0.2)
def main():
number_of_processes = 4
process_Queue = Queue()
processes = []
process_Queue.put(1)
q = Process(target=doSecondjob, args=(process_Queue))
q.start()
for w in range(number_of_processes):
p = Process(target=doFirstjob, args=(process_Queue))
processes.append(p)
p.start()
if __name__ == '__main__':
main()
You were getting error because Process was expecting a list/tuple in arguments/args.
Also instead of Empty it should be empty.
change the code to below.
from multiprocessing import Lock, Process, Queue, current_process
import time
import queue
def doFirstjob(process_Queue):
i=0
while True:
print("foo")
if i%100==0:
process_Queue.put(i)
else:
i+=1
def doSecondjob(process_Queue):
while(1):
print("bar")
if not process_Queue.empty:
task = process_Queue.get()
print("task: ",task)
else:
time.sleep(0.2)
def main():
number_of_processes = 4
process_Queue = Queue()
processes = []
process_Queue.put(1)
q = Process(target=doSecondjob, args=(process_Queue,))
q.start()
for w in range(number_of_processes):
p = Process(target=doFirstjob, args=(process_Queue,))
processes.append(p)
p.start()
if __name__ == '__main__':
main()
I am following an instruction from youtube to learn multiprocessing
from multiprocessing import Pool
import subprocess
import time
def f(n):
sum = 0
for x in range(1000):
sum += x*x
return sum
if __name__ == "__main__":
t1 = time.time()
p = Pool()
result = p.map(f, range(10000))
p.close()
p.join()
print("Pool took: ", time.time()-t1)
I am puzzled about p.close() and p.join()
when processes were closed, they did not exist any more, how could manipulate .join to them?
join() waits for a child process to be killed. Killed processes send a signal informing their parents that they are quite dead. close() doesn't kill any process, It just closes a pipe which informs readers of that pipe, that there will be no more data coming through it.
I've read a number of answers here on Stackoverflow about Python multiprocessing, and I think this one is the most useful for my purposes: python multiprocessing queue implementation.
Here is what I'd like to do: poll the database for new work, put it in the queue and have 4 processes continuously do the work. What I'm unclear on is what happens when an item in the queue is done being processed. In the question above, the process terminates when the queue is empty. However, in my case, I'd just like to keep waiting until there is data in the queue. So do I just sleep and periodically check the queue? So my worker processes will never die? Is that good practice?
def mp_worker(queue):
while True:
if (queue.qsize() == 0):
time.sleep(20)
else:
db_record = queue.get()
process_file(db_record)
def mp_handler():
num_workers = 4
processes = [Process(target=mp_worker, args=(queue,)) for _ in range(num_workers)]
for process in processes:
process.start()
for process in processes:
process.join()
if __name__ == '__main__':
db_conn = db.create_postgre_connection(DB_CONFIG)
while True:
db_records = db.retrieve_received_files(DB_CONN)
if (len(db_records) > 0):
for db_record in db_records:
queue.put(db_record)
mp_handler()
else:
time.sleep(20)
db_conn.close()
Does it make sense?
Thanks.
Figured it out. Workers have to die, since otherwise they never return. But I start a new set of workers when there is data anyway, so that's not a problem. Updated code:
def mp_worker(queue):
while queue.qsize() > 0 :
db_record = queue.get()
process_file(db_record)
def mp_handler():
num_workers = 4
if (queue.qsize() < num_workers):
num_workers = queue.qsize()
processes = [Process(target=mp_worker, args=(queue,)) for _ in range(num_workers)]
for process in processes:
process.start()
for process in processes:
process.join()
if __name__ == '__main__':
while True:
db_records = db.retrieve_received_files(DB_CONN)
print(db_records)
if (len(db_records) > 0):
for db_record in db_records:
queue.put(db_record)
mp_handler()
else:
time.sleep(20)
DB_CONN.close()
I am trying to use multiprocessing to process very large number of files.
I tried to put the list of files into queue and make 3 workers split the load with a common Queue data type. However this seems not working. Probably I am misunderstanding about the queue in multiprocessing package.
Below is the example source code:
import multiprocessing
from multiprocessing import Queue
def worker(i, qu):
"""worker function"""
while ~qu.empty():
val=qu.get()
print 'Worker:',i, ' start with file:',val
j=1
for k in range(i*10000,(i+1)*10000): # some time consuming process
for j in range(i*10000,(i+1)*10000):
j=j+k
print 'Worker:',i, ' end with file:',val
if __name__ == '__main__':
jobs = []
qu=Queue()
for j in range(100,110): # files numbers are from 100 to 110
qu.put(j)
for i in range(3): # 3 multiprocess
p = multiprocessing.Process(target=worker, args=(i,qu))
jobs.append(p)
p.start()
p.join()
Thanks for the comments.
I come to know that using Pool is the best solution.
import multiprocessing
import time
def worker(val):
"""worker function"""
print 'Worker: start with file:',val
time.sleep(1.1)
print 'Worker: end with file:',val
if __name__ == '__main__':
file_list=range(100,110)
p = multiprocessing.Pool(2)
p.map(worker, file_list)
Two issues:
1) you are joining only on the 3rd process
2) Why not use multiprocessing.Pool?
3) race condition on qu.get()
1 & 3)
import multiprocessing
from multiprocessing import Queue
def worker(i, qu):
"""worker function"""
while 1:
try:
val=qu.get(timeout)
except Queue.Empty: break# Yay no race condition
print 'Worker:',i, ' start with file:',val
j=1
for k in range(i*10000,(i+1)*10000): # some time consuming process
for j in range(i*10000,(i+1)*10000):
j=j+k
print 'Worker:',i, ' end with file:',val
if __name__ == '__main__':
jobs = []
qu=Queue()
for j in range(100,110): # files numbers are from 100 to 110
qu.put(j)
for i in range(3): # 3 multiprocess
p = multiprocessing.Process(target=worker, args=(i,qu))
jobs.append(p)
p.start()
for p in jobs: #<--- join on all processes ...
p.join()
2)
for how to use the Pool, see:
https://docs.python.org/2/library/multiprocessing.html
You are joining only the last of your created processes. That means if the first or the second process is still working while the third is finished, your main process is goning down and kills the remaining processes before they are finished.
You should join them all in order to wait until they are finished:
for p in jobs:
p.join()
Another thing is you should consider using qu.get_nowait() in order to get rid of the race condition between qu.empty() and qu.get().
For example:
try:
while 1:
message = self.queue.get_nowait()
""" do something fancy here """
except Queue.Empty:
pass
I hope that helps
I'm facing problems with the following example code:
from multiprocessing import Lock, Process, Queue, current_process
def worker(work_queue, done_queue):
for item in iter(work_queue.get, 'STOP'):
print("adding ", item, "to done queue")
#this works: done_queue.put(item*10)
done_queue.put(item*1000) #this doesnt!
return True
def main():
workers = 4
work_queue = Queue()
done_queue = Queue()
processes = []
for x in range(10):
work_queue.put("hi"+str(x))
for w in range(workers):
p = Process(target=worker, args=(work_queue, done_queue))
p.start()
processes.append(p)
work_queue.put('STOP')
for p in processes:
p.join()
done_queue.put('STOP')
for item in iter(done_queue.get, 'STOP'):
print(item)
if __name__ == '__main__':
main()
When the done Queue becomes big enough (a limit about 64k i think), the whole thing freezes without any further notice.
What is the general approach for such a situation when the queue becomes too big? is there some way to remove elements on the fly once they are processed? The Python docs recommend removing the p.join(), in a real application however i can not estimate when the processes have finished. Is there a simple solution for this problem besides infinite looping and using .get_nowait()?
This works for me with 3.4.0alpha4, 3.3, 3.2, 3.1 and 2.6. It tracebacks with 2.7 and 3.0. I pylint'd it, BTW.
#!/usr/local/cpython-3.3/bin/python
'''SSCCE for a queue deadlock'''
import sys
import multiprocessing
def worker(workerno, work_queue, done_queue):
'''Worker function'''
#reps = 10 # this worked for the OP
#reps = 1000 # this worked for me
reps = 10000 # this didn't
for item in iter(work_queue.get, 'STOP'):
print("adding", item, "to done queue")
#this works: done_queue.put(item*10)
for thing in item * reps:
#print('workerno: {}, adding thing {}'.format(workerno, thing))
done_queue.put(thing)
done_queue.put('STOP')
print('workerno: {0}, exited loop'.format(workerno))
return True
def main():
'''main function'''
workers = 4
work_queue = multiprocessing.Queue(maxsize=0)
done_queue = multiprocessing.Queue(maxsize=0)
processes = []
for integer in range(10):
work_queue.put("hi"+str(integer))
for workerno in range(workers):
dummy = workerno
process = multiprocessing.Process(target=worker, args=(workerno, work_queue, done_queue))
process.start()
processes.append(process)
work_queue.put('STOP')
itemno = 0
stops = 0
while True:
item = done_queue.get()
itemno += 1
sys.stdout.write('itemno {0}\r'.format(itemno))
if item == 'STOP':
stops += 1
if stops == workers:
break
print('exited done_queue empty loop')
for workerno, process in enumerate(processes):
print('attempting process.join() of workerno {0}'.format(workerno))
process.join()
done_queue.put('STOP')
if __name__ == '__main__':
main()
HTH