I'm trying to implement mutual exclusion using semaphore in Python. The two processes (proc1, proc2) are supposed to be two independent, concurrent processes. They do exactly the same thing: store n in array[n], then increment n.
The purpose of the program is to show that using semaphore we can ensure that the array is filled properly: [0,1,2,3,4,5,6,7,8,9], without skipping any index. However, my code seems to store [0,1,0,0,0,0,0,0,0,0]. I haven't used threads in python before, so I don't know what's going on.
import threading
import time
n = 0
array = [0]*10
sem = threading.Semaphore()
def proc1():
global n, array
while True:
sem.acquire()
array[n] = n
n += 1
sem.release()
time.sleep(0.25)
def proc2():
global n, array
while True:
sem.acquire()
array[n] = n
n += 1
sem.release()
time.sleep(0.25)
t = threading.Thread(target = proc1)
t.start()
t2 = threading.Thread(target = proc2)
t2.start()
print (array)
the problem was that the OP tried to print the result before the threads were done.
He should have waited for join.
import threading
import time
n = 0
array = [0]*10
sem = threading.Semaphore()
def proc(num):
global n
while True:
sem.acquire()
n = n+1
sem.release()
if n > 9:
break
array[n] = n
print ("Thread {}: {}".format(num,array))
time.sleep(0.25)
t1 = threading.Thread(target = proc, args=[1])
t2 = threading.Thread(target = proc, args=[2])
t1.start()
t2.start()
t1.join()
t2.join()
Different take on a Semaphore pattern, handing the "tasks" within the Sempahore itself
class Sempahore:
def __init__(self, max_threads):
self.active_threads = 0
self.max_threads = max_threads
self.tasks = []
def add_task(self, func, args):
self.tasks.append(
Task(
func=func,
args=args
)
)
def run_task(self, task: Task):
_func = task.func
_args = task.args
self.active_threads += 1
_func(*_args)
self.active_threads -= 1
def run(self, blocking=False):
if blocking:
self._run()
else:
t = Thread(target=self._run)
t.start()
def _run(self):
while True:
if self.active_threads < self.max_threads:
task = self.tasks.pop()
logger.info(f'starting task: {task.task_id}')
t = Thread(
target=self.run_task,
args=(task,))
t.start()
if len(self.tasks) == 0:
break
Related
I have written a bit of code to see the race condition, But it Doesn't happen.
class SharedContent:
def __init__(self, initia_value = 0) -> None:
self.initial_value = initia_value
def incerease(self ,delta = 1):
sleep(1)
self.initial_value += delta
content = SharedContent(0)
threads: list[Thread] = []
for i in range(250):
t = Thread(target=content.incerease)
t.start()
threads.append(t)
#wait until all threads have finished their job
while True:
n = 0
for t in threads:
if t.is_alive():
sleep(0.2)
continue
n += 1
if n == len(threads):
break
print(content.initial_value)
The output is 250 which implies no race condition has happened!
Why is that?
I even tried this with random sleep time but the output was the same.
I changed your program. This version prints a different number every time I run it.
#!/usr/bin/env python3
from threading import Thread
class SharedContent:
def __init__(self, initia_value = 0) -> None:
self.initial_value = initia_value
def incerease(self ,delta = 1):
for i in range(0, 1000000):
self.initial_value += delta
content = SharedContent(0)
threads = []
for i in range(2):
t = Thread(target=content.incerease)
t.start()
threads.append(t)
#wait until all threads have finished their job
for t in threads:
t.join()
print(content.initial_value)
What I changed:
Only two threads instead of 250.
Got rid of sleep() calls.
Each thread increments the variable one million times instead of just one time.
Main program uses join() to wait for the threads to finish.
#Version1
main_df = pd.read_csv('Million_rows.csv')
def myfunction(args*,start,end):
for i in range(start,end):
if condition1:
for item in mainTreeSearch:
...
lock.acquire()
###write to main_df
lock.release()
noLuck = False
break
if noLuck and Acondition:
lock.acquire()
###write to main_df
lock.release()
elif
... various asymmetric decision trees...
t1 = Thread(target=myfuct, args=(args*),0,250))
t2 = Thread(target=myfuct, args=(args*),250,500))
t3 = Thread(target=myfuct, args=(args*),500,750))
t4 = Thread(target=myfuct, args=(args*),750,1000))
My problem is that I don't know how to feed the threads the rest of the rows, I have tried Queue, unsuccessfully.
#Version2
def myfuntion(args*,q)
while True:
q.get()
....same search as above...without locking
q.task_done()
q = Queue(maxsize=0)
num_threads = 5
threads =[]
for i in range(num_threads):
worker = Thread(target=myfunction, args=(args*))
worker.setDaemon(True)
threads.append(worker)
worker.start()
for x in range(1000):
#time.sleep(.005)
q.put(x)
q.join()
In version 2 without sleep either 1 thread hogs all the data or random crashes happen.
In version 1, should I use threading.nodify() mechanism, if so how is it implemented?
I reformatted it to this and it works as expected
from Queue import Queue
import threading
q = Queue()
def myfuntion(q):
while True:
val = q.get()
print('\n' + str(threading.currentThread()))
print('\n' + str(val))
q.task_done()
num_threads = 5
threads = []
for i in range(num_threads):
worker = threading.Thread(target=myfuntion, args=(q,))
worker.setDaemon(True)
threads.append(worker)
worker.start()
for x in range(1000):
q.put(x)
q.join()
Check it out. I think the way you are passing the parameters is wrong.
I modified the example on Joinable queues on this link https://pymotw.com/2/multiprocessing/communication.html to run a function I wrote instead of a Task object. The modified code is listed below. The problem I am getting is that the consumers get poisoned without putting None in the tasks queue. They exit before completing the tasks. So I removed the check on None (as shown below) from the run function and I caught this exception:
'NoneType' object is not callable
I am sure that the None is not passed yet since the message "Poisoning Consumers" is not yet printed
import multiprocessing as mp
import MyLib
# Subclass of Process
class Consumer(mp.Process):
def __init__(self, task_queue, result_queue):
mp.Process.__init__(self)
self.task_queue = task_queue
self.result_queue = result_queue
self.daemon = True
# A method that defines the behavior of the process
def run(self):
proc_name = self.name
while True:
try:
next_task = self.task_queue.get()
# if next_task is None:
# # Poison pill means shutdown
# print('%s: Exiting' % proc_name)
# self.task_queue.task_done()
# break
mxR, disC = next_task()
self.task_queue.task_done()
self.result_queue.put((mxR, disC))
except Exception as e:
print(e)
return
if __name__ == '__main__':
# Establish communication queues
tasks = mp.JoinableQueue()
results = mp.Queue()
# Start consumers
num_consumers = mp.cpu_count() * 2
print('Creating %d consumers' % num_consumers)
consumers = [ Consumer(tasks, results)
for i in range(num_consumers) ]
for w in consumers:
w.start()
# Enqueue jobs
trials = 10
Tx_Range = 50
prnts = 4
for tx in list(range(30, 200, 20)):
file_name = 'output_{}_{}.txt'.format(tx,prnts)
output_file = open(file_name,'a')
output_file.write('Nodes\tTx_Range\tAvg_Rings\tAvg_Disc\n')
for n in list(range(50, 101, 50)):
ring_sum, disc_sum = 0, 0
for i in range (0, trials):
tasks.put(MyLib.GBMR_mp(1000, 1000, n, prnts, tx, False, results))
print('Done putting jobs')
for i in range (0, trials):
mxR, discN = results.get()
ring_sum += mxR
disc_sum += discN
avg_ring = ring_sum/trials
avg_disc = disc_sum/trials
print('Done Collecting Results, avg_disc = ', avg_disc,' and avg_rings = ', avg_ring)
s = '{}\t\t{}\t\t{}\t\t{}\n'.format(n,tx,avg_ring,avg_disc)
print('Nodes', n, 'is Done for Tx_range', tx)
output_file.write(s)
output_file.close()
# Add a poison pill for each consumer
print('Poisoning Consumers')
for i in range(num_consumers):
tasks.put(None)
# Wait for all of the tasks to finish
tasks.join()
What could be the cause of this problem? Could it be the queue.get() is returning None?
Thanks in advance
Since I did not get any suggestions, I tried to solve the problem and I came up with a solution (a bad one I believe), where I simply just forgot about consumers and started each function run in a Process. I kept the number of concurrent process limited by checking the number of started ones as shown below. But I am sure I am doing something wrong because the performance of this solution is much worse than not using multiprocesses. With multiprocessing the inner for loop on "n" takes about 2 minutes, but without multiprocessing it takes a few seconds. I am still a noob, can anyone point me in the right direction? here is the code:
import multiprocessing as mp
import MyLib
if __name__ == '__main__':
results = mp.Queue()
num_consumers = mp.cpu_count()
trials = 500
prnts = 4
num_of_proc = 0
consumers = []
joined = 0
for tx in list(range(30, 200, 20)):
file_name = 'Centered_BS_output_{}_{}.txt'.format(tx,prnts)
output_file = open(file_name,'a')
output_file.write('Nodes\tTx_Range\tAvg_Rings\tAvg_Disc\n')
for n in list(range(30, 1030, 30)):
consumers.clear()
ring_sum, disc_sum, joined, i, num_of_proc = 0, 0, 0, 0, 0
#for i in range (0, trials):
while i < trials:
if num_of_proc < num_consumers:
consumers.append(mp.Process(target=MyLib.GBMR_mp, args=(1000, 1000, n, prnts, tx, False, results)))
consumers[i].daemon = True
consumers[i].start()
num_of_proc += 1
i += 1
else:
consumers[joined].join()
num_of_proc -= 1
joined += 1
print('Done putting jobs')
for i in range (0, trials):
mxR, discN = results.get()
ring_sum += mxR
disc_sum += discN
avg_ring = ring_sum/trials
avg_disc = disc_sum/trials
print('Done Collecting Results, avg_disc = ', avg_disc,' and avg_rings = ', avg_ring)
s = '{}\t\t{}\t\t{}\t\t{}\n'.format(n,tx,avg_ring,avg_disc)
print('Nodes', n, 'is Done for Tx_range', tx)
output_file.write(s)
output_file.close()
So I've got this code for Producers and Consumers;
import threading
import time
import random
N = 8
buffer = N * [None]
free = threading.Semaphore(N)
items = threading.Semaphore(0)
def prod():
n = 0
i = 0
while True:
time.sleep(random.random())
free.acquire()
buffer[i] = n
i = (i + 1) % N
n += 1
items.release()
def cons():
i = 0
while True:
time.sleep(random.random())
items.acquire()
print(buffer[i])
i = (i + 1) % N
free.release()
def main():
p = threading.Thread(target=prod, args=[])
c = threading.Thread(target=cons, args=[])
p.start()
c.start()
p.join()
c.join()
main()
But I want to be able to have three threads each for the producer and consumer. Can someone suggest a way I could do this using a third semaphore? Thanks.
Assuming this is not a homework about semaphores and you want a real solution, you should use the Queue object, which can handle all of this by itself. If I understood it correctly, you want three producers and three consumers that share one buffer that can have at maximum 8 items. If that's the case, the code can be simplified to something like this:
import threading
import Queue
def prod(queue):
n = 0
while True:
time.sleep(random.random())
queue.put(n)
n += 1
def cons(queue):
while True:
time.sleep(random.random())
n = queue.get()
print n
def main():
N = 8
queue = Queue.Queue(N)
threads = []
for i in range(3):
threads.append(threading.Thread(target=cons, args=[queue])))
threads.append(threading.Thread(target=prod, args=[queue])))
for thread in threads:
thread.start()
for thread in threads:
thread.join() # this will never really finish, because the threads run forever
If you are interested how is the queue implemented internally, you can see the source code here.
I'm going through the "Little Book of Semaphores" right now, and I'm having a problem with the first Barrier problem. In the below code , I'm trying to have 3 threads rendezvous before continuing. This part works fine - I always get 3 "before"s pushed to the queue. However, I don't always get 3 "after"s pushed to the queue. Sometimes I do, but not always. What am I doing wrong?
import threading
import random
import Queue
import time
num_loops = 1
class myThread(threading.Thread):
def __init__(self, id, count, n, q, locks):
threading.Thread.__init__(self)
self.id = id
self.q = q
self.n = n
self.locks = locks
self.count = count
return
def run(self):
time.sleep(random.random()/100)
self.q.put("before")
with self.locks['mutex']:
self.count[0] += 1
if self.count[0] == self.n:
locks['barrier'].release()
locks['barrier'].acquire()
locks['barrier'].release()
time.sleep(random.random()/100)
self.q.put("after")
if __name__ == '__main__':
total = 10
incorrect = 0
num_threads = 3
for _ in range(total):
q = Queue.Queue()
locks = {'mutex': threading.Semaphore(1),
'barrier': threading.Semaphore(0),
}
threads = []
count = [0]
for i in range(num_threads):
t = myThread(i, count, num_threads, q, locks)
t.start()
threads.append(t)
for i in threads:
t.join()
print "join"
one_loop = ['before']*num_threads + ['after']*num_threads
total_loop = one_loop * num_loops
result = []
while not q.empty():
result.append(q.get())
print result
if result != total_loop:
incorrect += 1
print "%s out of %s is wrong" % (incorrect, total)
I found the problem. You do not join all the threads. The line:
for i in threads:
t.join()
print "join"
Should be:
for i in threads:
i.join() # changed line
print "join"
Joining t is first just waiting for the last thread created, then in the rest of the iterations a no-op.