Producer consumer 3 threads for each in python - python

I'm trying to do a producer consumer program. I got it working just fine with one thread for each and I'm trying to modify it to run three threads of each. It appears that each of the consumer threads is trying to consume each released item.
# N is the number of slots in the buffer
N = 8
n = 0
i=0
j=0
# initialise buf with the right length, but without values
buf = N * [None]
free = threading.Semaphore(N)
items = threading.Semaphore(0)
block = threading.Semaphore(1)
# a function for the producer thread
def prod(n, j):
while True:
time.sleep(random.random())
free.acquire()
# produce a number and add it to the buffer
buf[i] = n
#print("produced")
j = (j + 1) % N
n += 1
items.release()
# a function for the consumer thread
def cons(th):
global i
while True:
time.sleep(random.random())
#acquire items to allow the consumer to print.
items.acquire()
print(buf[i])
print("consumed, th:{} i:{}".format(th, i))
i = (i + 1) % N
#time.sleep(3)
free.release()
# a main function
def main():
p1 = threading.Thread(target=prod, args=[n,j])
p2 = threading.Thread(target=prod, args=[n,j])
p3 = threading.Thread(target=prod, args=[n,j])
c1 = threading.Thread(target=cons, args=[1])
c2 = threading.Thread(target=cons, args=[2])
c3 = threading.Thread(target=cons, args=[3])
p1.start()
p2.start()
p3.start()
c1.start()
c2.start()
c3.start()
p1.join()
p2.join()
p3.join()
c1.join()
c2.join()
c3.join()
main()
Any help is appreciated. I'm really at a loss with this one.

When your code in a thread acquires a semaphore, it should then subsequently release the same semaphore. So instead of:
items.acquire()
...
free.release()
your code must do, e.g.
items.acquire()
...
items.release()

Related

Share queue between processes

I am pretty new to multiprocessing in python and trying to achieve something which should be a rather common thing to do. But I cannot find an easy way when searching the web.
I want to put data in a queue and then make this queue available to different consumer functions. Of course when getting an element from the queue, all consumer functions should get the same element. The following example should make clear what I want to achieve:
from multiprocessing import Process, Queue
def producer(q):
for i in range(10):
q.put(i)
q.put(None)
def consumer1(q):
while True:
data = q.get()
if data is None:
break
print(data)
def consumer2(q):
while True:
data = q.get()
if data is None:
break
print(data)
def main():
q = Queue()
p1 = Process(target=producer, args=(q,))
p2 = Process(target=consumer1, args=(q,))
p3 = Process(target=consumer2, args=(q,))
p1.start()
p2.start()
p3.start()
p1.join()
p2.join()
p3.join()
if __name__ == '__main__':
main()
Since the script is not terminating and I only get the print output of one function I guess this is not the way to do it. I think sharing a queue implies some things to consider? It works fine when using only one consumer function.
Appreciate the help!
If the values you are storing can be represented by one of the fundamental data types defined in the ctypes module, then the following could work. Here we are implementing a "queue" that can hold int values or None:
from multiprocessing import Process, Condition
import ctypes
from multiprocessing.sharedctypes import RawArray, RawValue
from threading import local
import time
my_local = local()
my_local.current = 0
class StructuredInt(ctypes.Structure):
"""
This class is necessary because we want to be able to store in the RawArray
either an int or None, which requires using ctypes.c_void_p as the array type.
But, infortunately, ctypes.c_void_p(0) is interpreted as None.
So we need a way to represent 0. Field value 'value' is the
actual int value being stored and we use an arbitrarty 'ptr'
field value that will not be interpreted as None.
To store a None value, we set 'ptr' to ctypes.c_void_p(None) and field
'value' is irrelevant.
To store an integer. we set 'ptr' to ctypes.c_void_p(1) and field
'value' has the actual value.
"""
_fields_ = [('ptr', ctypes.c_void_p), ('value', ctypes.c_int)]
class MultiIntQueue:
"""
An integer queue that can be processed by multiple threads where each thread
can retrieve all the values added to the queue.
:param maxsize: The maximum queue capacity (defaults to 20 if specified as None)
:type maxsize: int
"""
def __init__(self, maxsize=None):
if maxsize is None:
maxsize = 20
self.maxsize = maxsize
self.q = RawArray(StructuredInt, maxsize)
self.condition = Condition()
self.size = RawValue(ctypes.c_int, 0)
def get(self):
with self.condition:
while my_local.current >= self.size.value:
self.condition.wait()
i = self.q[my_local.current]
my_local.current += 1
return None if i.ptr is None else i.value
def put(self, i):
assert 0 <= self.size.value < self.maxsize
with self.condition:
self.q[self.size.value] = (ctypes.c_void_p(None), 0) if i is None else (ctypes.c_void_p(1), i)
self.size.value += 1
self.condition.notify_all()
def producer(q):
for i in range(10):
q.put(i)
time.sleep(.3) # simulate processing
q.put(None)
def consumer1(q):
while True:
data = q.get()
if data is None:
break
time.sleep(.1) # simulate processing
print('Consumer 1:', data)
def consumer2(q):
while True:
data = q.get()
if data is None:
break
time.sleep(.1) # simulate processing
print('Consumer 2:', data)
def main():
q = MultiIntQueue()
p1 = Process(target=producer, args=(q,))
p2 = Process(target=consumer1, args=(q,))
p3 = Process(target=consumer2, args=(q,))
p1.start()
p2.start()
p3.start()
p1.join()
p2.join()
p3.join()
if __name__ == '__main__':
main()
Prints:
Consumer 1: 0
Consumer 2: 0
Consumer 2: 1
Consumer 1: 1
Consumer 2: 2
Consumer 1: 2
Consumer 2: 3
Consumer 1: 3
Consumer 2: 4
Consumer 1: 4
Consumer 1: 5
Consumer 2: 5
Consumer 1: 6
Consumer 2: 6
Consumer 1: 7
Consumer 2: 7
Consumer 2: 8
Consumer 1: 8
Consumer 1: 9
Consumer 2: 9
Your question exemplifies the misunderstanding
"all consumer functions should get the same element"
That's just not how queues work. Queues are automatically managed (there's quite a lot under the hood) such if one item is put in, only one item can be taken out. That item is not duplicated to all consumers. It seems like you actually need two separate queues to guarantee that each consumer gets each input without competing against the other consumer:
from multiprocessing import Process, Queue
def producer(q1, q2):
for i in range(10):
q1.put(i)
q2.put(i)
q1.put(None)
q2.put(None)
def consumer1(q):
while True:
data = q.get()
if data is None:
break
print(data)
def consumer2(q):
while True:
data = q.get()
if data is None:
break
print(data)
def main():
q1 = Queue()
q2 = Queue()
p1 = Process(target=producer, args=(q1, q2))
p2 = Process(target=consumer1, args=(q1,))
p3 = Process(target=consumer2, args=(q2,))
p1.start()
p2.start()
p3.start()
p1.join()
p2.join()
p3.join()
if __name__ == '__main__':
main()

Python Multiprocessing shared variables erratic behavior

The following simple code should, as far I can see, always print out '0' in the end. However, when running it with "lock = True", it often prints out other positive or negative numbers.
import multiprocessing as mp
import sys
import time
num = mp.Value('d', 0.0, lock = False)
def func1():
global num
print ('start func1')
#While num.value < 100000:
for x in range(1000):
num.value += 1
#print(num.value)
print ('end func1')
def func2():
global num
print ('start func2')
#while num.value > -10000:
for x in range(1000):
num.value -= 1
#print(num.value)
print ('end func2')
if __name__=='__main__':
ctx = mp.get_context('fork')
p1 = ctx.Process(target=func1)
p1.start()
p2 = ctx.Process(target=func2)
p2.start()
p1.join()
p2.join()
sys.stdout.flush()
time.sleep(25)
print(num.value)
Can anyone offer any explanation?
To clarify: When lock is set to "False", it behaves as expected, printing out '0', however, when it is "True" it often does not.
This is more noticeable/happens more often for larger values of 'range'.
Tested this on two platforms (Mac OSx and Ubuntu 14.04.01) both with python 3.6.
The docs for multiprocessing.Value are very explicit about this:
Operations like += which involve a read and write are not atomic. So if, for instance, you want to atomically increment a shared value it is insufficient to just do
counter.value += 1
Assuming the associated lock is recursive (which it is by default) you can instead do
with counter.get_lock():
counter.value += 1
To your comment, this is not "1000 incrementations". This is 1000 iterations of:
# Take lock on num.value
temp_value = num.value # (1)
# release lock on num.value (anything can modify it now)
temp_value += 1 # (2)
# Take lock on num.value
num.value = temp_value # (3)
# release lock on num.value
That's what it means when it says += is not atomic.
If num.value is modified by another process during line 2, then line 3 will write the wrong value to num.value.
To give an example of a better way to approach what you're doing, here's a version using Queues that ensures everything stays tick-tock in lock step:
import multiprocessing as mp
import queue
import sys
# An increment process. Takes a value, increments it, passes it along
def func1(in_queue: mp.Queue, out_queue: mp.Queue):
print('start func1')
for x in range(1000):
n = in_queue.get()
n += 1
print("inc", n)
out_queue.put(n)
print('end func1')
# An decrement process. Takes a value, decrements it, passes it along
def func2(in_queue: mp.Queue, out_queue: mp.Queue):
print('start func2')
for x in range(1000):
n = in_queue.get()
n -= 1
print("dec", n)
out_queue.put(n)
print('end func2')
if __name__ == '__main__':
ctx = mp.get_context('fork')
queue1 = mp.Queue()
queue2 = mp.Queue()
# Make two processes and tie their queues back to back. They hand a value
# back and forth until they've run their course.
p1 = ctx.Process(target=func1, args=(queue1, queue2,))
p1.start()
p2 = ctx.Process(target=func2, args=(queue2, queue1,))
p2.start()
# Get it started
queue1.put(0)
# Wait from them to finish
p1.join()
p2.join()
# Since this is a looping process, the result is on the queue we put() to.
# (Using block=False because I'd rather throw an exception if something
# went wrong rather than deadlock.)
num = queue1.get(block=False)
print("FINAL=%d" % num)
This is a very simplistic example. In more robust code you need to think about what happens in failure cases. For example, if p1 throws an exception, p2 will deadlock waiting for its value. In many ways that's a good thing since it means you can recover the system by starting a new p1 process with the same queues. This way of dealing with concurrency is called the Actor model if you want to study it further.

Cannot obtain values while parallelizing 2 for loops

I am trying to run the following snippet which appends data to lists 'tests1' and 'tests2'. But when I print 'tests1' and 'tests2', the displayed list is empty. Anything incorrect here?
tests1 = []
tests2 = []
def func1():
for i in range(25,26):
tests1.append(test_loader.get_tests(test_prefix=new_paths[i],tags=params.get('tags', None),
exclude=params.get('exclude', False)))
def func2():
for i in range(26,27):
tests2.append(test_loader.get_tests(test_prefix=new_paths[i],tags=params.get('tags', None),
exclude=params.get('exclude', False)))
p1 = mp.Process(target=func1)
p2 = mp.Process(target=func2)
p1.start()
p2.start()
p1.join()
p2.join()
print tests1
print tests2
The worker processes don't actually share the same object. It gets copied (pickled).
You can send values between processes using a multiprocessing.Queue (or by various other means). See my simple example (in which I've made your tests into integers for simplicity).
from multiprocessing import Process, Queue
def add_tests1(queue):
for i in range(10):
queue.put(i)
queue.put(None)
def add_tests2(queue):
for i in range(100,110):
queue.put(i)
queue.put(None)
def run_tests(queue):
while True:
test = queue.get()
if test is None:
break
print test
if __name__ == '__main__':
queue1 = Queue()
queue2 = Queue()
add_1 = Process(target = add_tests1, args = (queue1,))
add_2 = Process(target = add_tests2, args = (queue2,))
run_1 = Process(target = run_tests, args = (queue1,))
run_2 = Process(target = run_tests, args = (queue2,))
add_1.start(); add_2.start(); run_1.start(); run_2.start()
add_1.join(); add_2.join(); run_1.join(); run_2.join()
Note that the parent program can also access the queues.

Producers and Consumers - Multiple Threads in Python

So I've got this code for Producers and Consumers;
import threading
import time
import random
N = 8
buffer = N * [None]
free = threading.Semaphore(N)
items = threading.Semaphore(0)
def prod():
n = 0
i = 0
while True:
time.sleep(random.random())
free.acquire()
buffer[i] = n
i = (i + 1) % N
n += 1
items.release()
def cons():
i = 0
while True:
time.sleep(random.random())
items.acquire()
print(buffer[i])
i = (i + 1) % N
free.release()
def main():
p = threading.Thread(target=prod, args=[])
c = threading.Thread(target=cons, args=[])
p.start()
c.start()
p.join()
c.join()
main()
But I want to be able to have three threads each for the producer and consumer. Can someone suggest a way I could do this using a third semaphore? Thanks.
Assuming this is not a homework about semaphores and you want a real solution, you should use the Queue object, which can handle all of this by itself. If I understood it correctly, you want three producers and three consumers that share one buffer that can have at maximum 8 items. If that's the case, the code can be simplified to something like this:
import threading
import Queue
def prod(queue):
n = 0
while True:
time.sleep(random.random())
queue.put(n)
n += 1
def cons(queue):
while True:
time.sleep(random.random())
n = queue.get()
print n
def main():
N = 8
queue = Queue.Queue(N)
threads = []
for i in range(3):
threads.append(threading.Thread(target=cons, args=[queue])))
threads.append(threading.Thread(target=prod, args=[queue])))
for thread in threads:
thread.start()
for thread in threads:
thread.join() # this will never really finish, because the threads run forever
If you are interested how is the queue implemented internally, you can see the source code here.

Use all cpu core in a python script for one process

i wanna use all cpu in a python script
i find some code same :
def do_sum():
min = 0
max = 100000000
while min < max:
min += 1
file = open('mytext.txt','a')
file.write(str(min))
def main():
q = Queue()
p1 = Process(target=do_sum)
p2 = Process(target=do_sum)
p1.start()
p2.start()
r1 = q.get()
r2 = q.get()
print r1+r2
if __name__=='__main__':
main()
but it's not match cpu together
p1 start write from 1,2,3,4,5 ....
and p2 not continue p2 also start from begin 1,2,3,4
so result is : 1122334455
how i can match 2 core of cpu together ?
i want write file with fastest my PC can do it , i use i7 cpu ,how can i use all
You need a lock mechanism : http://en.wikipedia.org/wiki/Lock_%28computer_science%29
and references for (min, max), not local copies.
The multiprocessing lib has already a Lock() object to avoid overwriting and a Value() object to share a mutual state between several process.
from multiprocessing import Queue, Process, Lock,Value
def do_sum(id, counter, lock):
MAX = 50
while counter.value < MAX:
lock.acquire()
counter.value += 1
file = open('mytext.txt','a')
file.write(str(counter.value))
file.write("\n")
file.close()
lock.release()
def main():
counter = Value('d', 0.0)
lock = Lock()
#f = open('mytext.txt','w')
#f.close()
print 'atat'
q = Queue()
p1 = Process(target=do_sum, args=(0, counter, lock,) )
p2 = Process(target=do_sum, args=(1,counter, lock,) )
p1.start()
p2.start()
r1 = q.get()
r2 = q.get()
print r1+r2
if __name__=='__main__':
main()
Anyway, you can harness the power of your cpu all you want, the perfs' bottleneck of your algorithm is located in the I/O operations (which are inherently sequentials).

Categories