I'm struggling to make a producer consumer queue in Python3. I can't get my consumer to wake up:
from multiprocessing import Process, Queue
import time
def consumer(q):
while(True):
data=q.get()
if (data[0]==False):
print("Killing")
return
print((data[1]))
time.sleep(1)
maxitems=3
q = Queue(maxitems)
p = Process(target=consumer, args=(q,))
p.start()
for idx in range(0,10):
q.put((True,idx))
#Where idx would normally be a chunk of data
p.put((False,False))
p.join()
Output:
0
then it locks...
How do I get the consumer thread to wake up when I push data to it?
Launch:
python3.3 tryit.py
Built with:
[ebuild R ] dev-lang/python-3.3.5-r1:3.3::gentoo USE="gdbm ipv6 ncurses readline ssl threads xml -build -doc -examples -hardened -sqlite -tk -wininst" 0 KiB
p.put((False,False)) is wrong and some non-idiomatic Python, otherwise it's fine.
from multiprocessing import Process, Queue
import time
def consumer(q):
while True:
data=q.get()
if data[0]==False:
print("Killing")
break
print(data[1])
time.sleep(1)
maxitems=3
q = Queue(maxitems)
p = Process(target=consumer, args=(q,))
p.start()
for idx in range(0,10):
q.put((True,idx))
#Where idx would normally be a chunk of data
q.put((False,False))
p.join()
Somehow this needs to run from main
from multiprocessing import Process, Queue
import time
def consumer(q):
while(True):
data=q.get()
if (data[0]==False):
print("Killing")
return
print((data[1]))
time.sleep(1)
if __name__ == '__main__':
maxitems=3
q = Queue(maxitems)
p = Process(target=consumer, args=(q,))
p.start()
for idx in range(0,10):
q.put((True,idx))
#Where idx would normally be a chunk of data
q.put((False,False))
p.join()
Related
from multiprocessing import Pool
from functools import partial
from time import sleep
import random
import string
import uuid
import os
import glob
def task_a(param1, param2, mydata):
thread_id = str(uuid.uuid4().hex) # this may not be robust enough to guarantee no collisions, address
output_filename = ''.join([str(thread_id),'.txt'])
# part 1 - create output file for task_b to use
with open(output_filename, 'w') as outfile:
for line in mydata:
outfile.write(line)
# part 2 - do some extra stuff (whilst task_b is running)
sleep(5)
print('Task A finished')
return output_filename # not interested in return val
def task_b(expected_num_files):
processed_files = 0
while processed_files<expected_num_files:
print('I am task_b, waiting for {} files ({} so far)'.format(expected_num_files, processed_files))
path_to_search = ''
for filename in glob.iglob(path_to_search + '*.txt', recursive=True):
print('Got file : {}'.format(filename))
# would do something complicated here
os.rename(filename, filename+'.done')
processed_files+=1
sleep(10)
if __name__ == '__main__':
param1 = '' # dummy variable, need to support in solution
param2 = '' # dummy variable, need to support in solution
num_workers = 2
full_data = [[random.choice(string.ascii_lowercase) for _ in range(5)] for _ in range(100)]
print(full_data)
for i in range(0, len(full_data), num_workers):
print('Going to process {}'.format(full_data[i:i+num_workers]))
p = Pool(num_workers)
task_a_func = partial(task_a, param1, param2)
results = p.map(task_a_func, full_data[i:i+num_workers])
p.close()
p.join()
task_b(expected_num_files=num_workers) # want this running sooner
print('Iteration {} complete'.format(i))
#want to wait for task_a's and task_b to finish
I'm having trouble scheduling these tasks to run concurrently.
task_a is a multiprocessing pool that produces an output file part way through it execution.
task_b MUST process the output files sequentially can be in any order (can be as soon as they are available), WHILST task_a continues to run (it will no longer change the output file)
The next iteration must only start when both all task_a's have completed AND task_b has completed.
The toy code I have posted obviously waits for task_a's to fully complete before task_b is started (which is not what I want)
I have looked at multiprocessing / subprocess etc. but cannot find a way to launch both the pool and the single task_b process concurrently AND wait for BOTH to finish.
task_b is written as if it could be changed to an external script, but I am still stuck on how manage the execution.
Should I effectively merge code from task_b into task_a and somehow pass a flag to ensure one worker per pool 'runs the task_b code' via a if/else - at least then I would just be waiting on the pool to complete?
You can use an interprocess queue to communicate the filenames between task a and task b.
Also, initializing pool repeatedly inside the loop is harmful and unnecessarily slow.
Its better to initialize the pool once in the beginning.
from multiprocessing import Pool, Manager, Event
from functools import partial
from time import sleep
import random
import string
import uuid
import os
import glob
def task_a(param1, param2, queue, mydata):
thread_id = str(uuid.uuid4().hex)
output_filename = ''.join([str(thread_id),'.txt'])
output_filename = 'data/' + output_filename
with open(output_filename, 'w') as outfile:
for line in mydata:
outfile.write(line)
print(f'{thread_id}: Task A file write complete for data {mydata}')
queue.put(output_filename)
print('Task A finished')
def task_b(queue, num_workers, data_size, event_task_b_done):
print('Task b started!')
processed_files = 0
while True:
filename = queue.get()
if filename == 'QUIT':
# Whenever you want task_b to quit, just push 'quit' to the queue
print('Task b quitting')
break
print('Got file : {}'.format(filename))
os.rename(filename, filename+'.done')
processed_files+=1
print(f'Have processed {processed_files} so far!')
if (processed_files % num_workers == 0) or (processed_files == data_size):
event_task_b_done.set()
if __name__ == '__main__':
param1 = '' # dummy variable, need to support in solution
param2 = '' # dummy variable, need to support in solution
num_workers = 2
data_size = 100
full_data = [[random.choice(string.ascii_lowercase) for _ in range(5)] for _ in range(data_size)]
mgr = Manager()
queue = mgr.Queue()
event_task_b_done = mgr.Event()
# One extra worker for task b
p = Pool(num_workers + 1)
p.apply_async(task_b, args=(queue, num_workers, data_size, event_task_b_done))
task_a_func = partial(task_a, param1, param2, queue)
for i in range(0, len(full_data), num_workers):
data = full_data[i:i+num_workers]
print('Going to process {}'.format(data))
p.map_async(task_a_func, full_data[i:i+num_workers])
print(f'Waiting for task b to process all {num_workers} files...')
event_task_b_done.wait()
event_task_b_done.clear()
print('Iteration {} complete'.format(i))
queue.put('QUIT')
p.close()
p.join()
exit(0)
My training system consists of a bunch of processes that exchange data in the form of tensors, or list/dictionaries of tensors. Memory sharing via the torch.multiprocessing module is a known technique to speedup similar workflows. Yet for some reason it does not help me with my app.
Here's a test script that emulates a system, we create a process and send tensors via a queue:
import sys
import time
import torch
from torch.multiprocessing import Process as TorchProcess
from torch.multiprocessing import Queue as TorchQueue
q = TorchQueue()
def torch_shared_mem_process():
counter = 0
while True:
data = q.get()
counter += 1
if data is None:
return
print('Received data:', len(data), data, counter)
def test_mem_share(share_memory):
p = TorchProcess(target=torch_shared_mem_process)
p.start()
def sample_data():
return torch.rand([1000, 128, 72, 3], dtype=torch.float)
start = time.time()
n = 50
for i in range(n):
data = sample_data()
for data_item in data:
if share_memory:
data_item.share_memory_()
q.put(data)
print(f'Progress {i}/{n}')
q.put(None)
p.join()
print(f'Finished sending {n} tensor lists!')
took_seconds = time.time() - start
return took_seconds
def main():
no_shared_memory = test_mem_share(share_memory=False)
with_shared_memory = test_mem_share(share_memory=True)
print(f'Took {no_shared_memory:.1f} s without shared memory.')
print(f'Took {with_shared_memory:.1f} s with shared memory.')
if __name__ == '__main__':
sys.exit(main())
Since I am using torch.multiprocessing I expect version with share_memory=True to be faster, but in reality, it is actually marginally slower:
Took 10.2 s without shared memory.
Took 11.7 s with shared memory.
Did I misunderstand the way torch.multiprocessing.Queue works?
I believe torch.multiprocessing.Queue already moves tensors to shared memory when transporting them, so data_item.share_memory_() shouldn't speed things up any further.
Have you solved this issue? I modified your code a little and there is nothing change (sharing time) when switching between torch.multiprocessing and multiprocessing
import sys
import time
import torch
from torch.multiprocessing import Process
from torch.multiprocessing import Queue
# from multiprocessing import Process
# from multiprocessing import Queue
def torch_shared_mem_process(q):
while True:
data = q.get()
if data is None:
return
print('Received data:', len(data), data)
def test_mem_share():
q = Queue()
p = Process(target=torch_shared_mem_process, args=(q,))
p.start()
def sample_data():
return torch.zeros([100, 3, 1080, 1920], dtype=torch.float)
data = sample_data()
start = time.time()
q.put(data)
q.put(None)
p.join()
print(f'Finished sending tensor!')
took_seconds = time.time() - start
return took_seconds
def main():
shared_memory = test_mem_share()
print(f'Took {shared_memory:.1f} s with shared memory.')
if __name__ == '__main__':
sys.exit(main())
I am using python multiprocessing where a number of workers are doing a job and queueing their output to a listener. The listener will then append their outputs into a numpy array as below:
def listener(q):
global data
while 1:
m = q.get()
if m == 'kill':
print('FINISHED')
# Save data to file
break
data = np.column_stack((data, m))
This is the main code
data = np.empty([0,6])
manager = mp.Manager()
q = manager.Queue()
pool = mp.Pool(mp.cpu_count())
watcher = pool.apply_async(listener, (q,))
jobs = []
for i in range(100):
job = pool.apply_async(process_data, (x1, x2, x3,))
jobs.append(job)
for job in jobs:
job.get()
q.put('kill')
pool.close()
process data is a function that puts an array into the queue that the listener is then pulling from. The problem is that when running the code print('FINISHED') is never executed while data is being populated so I know that listener is working. I am also getting an output warning:
FutureWarning: elementwise comparison failed; returning scalar instead, but in the future will perform elementwise comparison
if m == 'kill':
Trying to debug this I commented out the creation of worker jobs like the code below, and FINISHED actually prints and warning message disappears. Why is this happening?
data = np.empty([0,6])
manager = mp.Manager()
q = manager.Queue()
pool = mp.Pool(mp.cpu_count())
watcher = pool.apply_async(listener, (q,))
jobs = []
#for i in range(100):
# job = pool.apply_async(process_data, (x1, x2, x3,))
# jobs.append(job)
#for job in jobs:
# job.get()
q.put('kill')
pool.close()
The following code will count all 750 joins and will print the results queue, but after it does that it gets stuck in deadlock. If I assign results to multiprocessing.Queue(), the program deadlocks immediately.
def function(job, results):
# do stuff
results_q.put(stuff)
if __name__ == '__main__':
devices = {}
with open('file.txt', 'r') as f:
projectFile= f.readlines()
jobs = multiprocessing.Queue()
results = multiprocessing.Manager().Queue()
pool = [ multiprocessing.Process(target=function, args=(jobs, results)) for ip in itertools.islice(projectFile, 0, 750) ]
for p in pool:
p.start()
for n in projectFile:
jobs.put(n.strip())
for p in pool:
jobs.put(None)
count=0
for p in pool:
p.join()
count += 1
print count
print results
Does anyone see anything that could be causing the deadlocks? I am pretty unsure of how to proceed as it all seems to check out in my head. Any help would be appreciated!
I think that this problem is caused by creating multiple processes that. This is not necessarily a deadlock, but the algorithm is taking a long time to instantiate methods. I made a test with threads and apparently worked well faster. Look the code:
import multiprocessing
import itertools
import threading
def function(job, results):
# do stuff
results.put(stuff)
if __name__ == '__main__':
devices = {}
with open('file.txt', 'r') as f:
projectFile= f.readlines()
jobs = multiprocessing.Queue()
results = multiprocessing.Manager().Queue()
pool = [threading.Thread(target=function, args=(jobs, results)) for ip in itertools.islice(projectFile, 0, 750) ]
for i,p in enumerate(pool):
print "Started Thread Number", i # Log to verify
p.start()
for n in projectFile:
jobs.put(n.strip())
for p in pool:
jobs.put(None)
count=0
for p in pool:
p.join() # This join is dangerous, make sure of the thread not raise any error
count += 1
print count
print results
I dont know if this code will solve your problem, maybe will be executed more fast.
I am using multiprocessing module via class Process to do some not cpu-bound tasks, e.g. I/O, or web requests. If the tasks take too long the CPU reaches 100% of usage (all threads are waiting the data to return). I suspect asynchronous execution solution but I have never done something like this. The code I am using is something like the following where I have a huge list and each process works on a chunk.
Could you please make a suggestion in this direction?
Thanks in advance!!
import multiprocessing
def getData(urlsChunk, myQueue):
for url in urlsChunk:
fp = urllib.urlopen(url)
try:
data = fp.read()
myQueue.put(data)
finally:
fp.close()
return myQueue
manager = multiprocessing.Manager()
HUGEQ = manager.Queue()
urls = ['a huge list of url items']
chunksize = int(math.ceil(len(urls) / float(nprocs)))
for i in range(nprocs):
p = Process(
target = getData, # This is my worker
args=(urls[chunksize * i:chunksize * (i + 1)],
MYQUEUE
)
)
processes.append(p)
p.start()
for p in processes:
p.join()
while True:
try:
MYQUEUEelem = MYQUEUE.get(block=False)
except Empty:
break
else:
'do something with the MYQUEUEelem'
Using multiprocessing.Pool, your code can be simplified:
import multiprocessing
def getData(url):
fp = urllib.urlopen(url)
try:
return fp.read()
finally:
fp.close()
if __name__ == '__main__': # should protect the "entry point" of the program
urls = ['a huge list of url items']
pool = multiprocessing.Pool()
for result in pool.imap(getData, urls, chunksize=10):
# do something with the result