I'm going through the "Little Book of Semaphores" right now, and I'm having a problem with the first Barrier problem. In the below code , I'm trying to have 3 threads rendezvous before continuing. This part works fine - I always get 3 "before"s pushed to the queue. However, I don't always get 3 "after"s pushed to the queue. Sometimes I do, but not always. What am I doing wrong?
import threading
import random
import Queue
import time
num_loops = 1
class myThread(threading.Thread):
def __init__(self, id, count, n, q, locks):
threading.Thread.__init__(self)
self.id = id
self.q = q
self.n = n
self.locks = locks
self.count = count
return
def run(self):
time.sleep(random.random()/100)
self.q.put("before")
with self.locks['mutex']:
self.count[0] += 1
if self.count[0] == self.n:
locks['barrier'].release()
locks['barrier'].acquire()
locks['barrier'].release()
time.sleep(random.random()/100)
self.q.put("after")
if __name__ == '__main__':
total = 10
incorrect = 0
num_threads = 3
for _ in range(total):
q = Queue.Queue()
locks = {'mutex': threading.Semaphore(1),
'barrier': threading.Semaphore(0),
}
threads = []
count = [0]
for i in range(num_threads):
t = myThread(i, count, num_threads, q, locks)
t.start()
threads.append(t)
for i in threads:
t.join()
print "join"
one_loop = ['before']*num_threads + ['after']*num_threads
total_loop = one_loop * num_loops
result = []
while not q.empty():
result.append(q.get())
print result
if result != total_loop:
incorrect += 1
print "%s out of %s is wrong" % (incorrect, total)
I found the problem. You do not join all the threads. The line:
for i in threads:
t.join()
print "join"
Should be:
for i in threads:
i.join() # changed line
print "join"
Joining t is first just waiting for the last thread created, then in the rest of the iterations a no-op.
Related
I have written a bit of code to see the race condition, But it Doesn't happen.
class SharedContent:
def __init__(self, initia_value = 0) -> None:
self.initial_value = initia_value
def incerease(self ,delta = 1):
sleep(1)
self.initial_value += delta
content = SharedContent(0)
threads: list[Thread] = []
for i in range(250):
t = Thread(target=content.incerease)
t.start()
threads.append(t)
#wait until all threads have finished their job
while True:
n = 0
for t in threads:
if t.is_alive():
sleep(0.2)
continue
n += 1
if n == len(threads):
break
print(content.initial_value)
The output is 250 which implies no race condition has happened!
Why is that?
I even tried this with random sleep time but the output was the same.
I changed your program. This version prints a different number every time I run it.
#!/usr/bin/env python3
from threading import Thread
class SharedContent:
def __init__(self, initia_value = 0) -> None:
self.initial_value = initia_value
def incerease(self ,delta = 1):
for i in range(0, 1000000):
self.initial_value += delta
content = SharedContent(0)
threads = []
for i in range(2):
t = Thread(target=content.incerease)
t.start()
threads.append(t)
#wait until all threads have finished their job
for t in threads:
t.join()
print(content.initial_value)
What I changed:
Only two threads instead of 250.
Got rid of sleep() calls.
Each thread increments the variable one million times instead of just one time.
Main program uses join() to wait for the threads to finish.
#Version1
main_df = pd.read_csv('Million_rows.csv')
def myfunction(args*,start,end):
for i in range(start,end):
if condition1:
for item in mainTreeSearch:
...
lock.acquire()
###write to main_df
lock.release()
noLuck = False
break
if noLuck and Acondition:
lock.acquire()
###write to main_df
lock.release()
elif
... various asymmetric decision trees...
t1 = Thread(target=myfuct, args=(args*),0,250))
t2 = Thread(target=myfuct, args=(args*),250,500))
t3 = Thread(target=myfuct, args=(args*),500,750))
t4 = Thread(target=myfuct, args=(args*),750,1000))
My problem is that I don't know how to feed the threads the rest of the rows, I have tried Queue, unsuccessfully.
#Version2
def myfuntion(args*,q)
while True:
q.get()
....same search as above...without locking
q.task_done()
q = Queue(maxsize=0)
num_threads = 5
threads =[]
for i in range(num_threads):
worker = Thread(target=myfunction, args=(args*))
worker.setDaemon(True)
threads.append(worker)
worker.start()
for x in range(1000):
#time.sleep(.005)
q.put(x)
q.join()
In version 2 without sleep either 1 thread hogs all the data or random crashes happen.
In version 1, should I use threading.nodify() mechanism, if so how is it implemented?
I reformatted it to this and it works as expected
from Queue import Queue
import threading
q = Queue()
def myfuntion(q):
while True:
val = q.get()
print('\n' + str(threading.currentThread()))
print('\n' + str(val))
q.task_done()
num_threads = 5
threads = []
for i in range(num_threads):
worker = threading.Thread(target=myfuntion, args=(q,))
worker.setDaemon(True)
threads.append(worker)
worker.start()
for x in range(1000):
q.put(x)
q.join()
Check it out. I think the way you are passing the parameters is wrong.
I modified the example on Joinable queues on this link https://pymotw.com/2/multiprocessing/communication.html to run a function I wrote instead of a Task object. The modified code is listed below. The problem I am getting is that the consumers get poisoned without putting None in the tasks queue. They exit before completing the tasks. So I removed the check on None (as shown below) from the run function and I caught this exception:
'NoneType' object is not callable
I am sure that the None is not passed yet since the message "Poisoning Consumers" is not yet printed
import multiprocessing as mp
import MyLib
# Subclass of Process
class Consumer(mp.Process):
def __init__(self, task_queue, result_queue):
mp.Process.__init__(self)
self.task_queue = task_queue
self.result_queue = result_queue
self.daemon = True
# A method that defines the behavior of the process
def run(self):
proc_name = self.name
while True:
try:
next_task = self.task_queue.get()
# if next_task is None:
# # Poison pill means shutdown
# print('%s: Exiting' % proc_name)
# self.task_queue.task_done()
# break
mxR, disC = next_task()
self.task_queue.task_done()
self.result_queue.put((mxR, disC))
except Exception as e:
print(e)
return
if __name__ == '__main__':
# Establish communication queues
tasks = mp.JoinableQueue()
results = mp.Queue()
# Start consumers
num_consumers = mp.cpu_count() * 2
print('Creating %d consumers' % num_consumers)
consumers = [ Consumer(tasks, results)
for i in range(num_consumers) ]
for w in consumers:
w.start()
# Enqueue jobs
trials = 10
Tx_Range = 50
prnts = 4
for tx in list(range(30, 200, 20)):
file_name = 'output_{}_{}.txt'.format(tx,prnts)
output_file = open(file_name,'a')
output_file.write('Nodes\tTx_Range\tAvg_Rings\tAvg_Disc\n')
for n in list(range(50, 101, 50)):
ring_sum, disc_sum = 0, 0
for i in range (0, trials):
tasks.put(MyLib.GBMR_mp(1000, 1000, n, prnts, tx, False, results))
print('Done putting jobs')
for i in range (0, trials):
mxR, discN = results.get()
ring_sum += mxR
disc_sum += discN
avg_ring = ring_sum/trials
avg_disc = disc_sum/trials
print('Done Collecting Results, avg_disc = ', avg_disc,' and avg_rings = ', avg_ring)
s = '{}\t\t{}\t\t{}\t\t{}\n'.format(n,tx,avg_ring,avg_disc)
print('Nodes', n, 'is Done for Tx_range', tx)
output_file.write(s)
output_file.close()
# Add a poison pill for each consumer
print('Poisoning Consumers')
for i in range(num_consumers):
tasks.put(None)
# Wait for all of the tasks to finish
tasks.join()
What could be the cause of this problem? Could it be the queue.get() is returning None?
Thanks in advance
Since I did not get any suggestions, I tried to solve the problem and I came up with a solution (a bad one I believe), where I simply just forgot about consumers and started each function run in a Process. I kept the number of concurrent process limited by checking the number of started ones as shown below. But I am sure I am doing something wrong because the performance of this solution is much worse than not using multiprocesses. With multiprocessing the inner for loop on "n" takes about 2 minutes, but without multiprocessing it takes a few seconds. I am still a noob, can anyone point me in the right direction? here is the code:
import multiprocessing as mp
import MyLib
if __name__ == '__main__':
results = mp.Queue()
num_consumers = mp.cpu_count()
trials = 500
prnts = 4
num_of_proc = 0
consumers = []
joined = 0
for tx in list(range(30, 200, 20)):
file_name = 'Centered_BS_output_{}_{}.txt'.format(tx,prnts)
output_file = open(file_name,'a')
output_file.write('Nodes\tTx_Range\tAvg_Rings\tAvg_Disc\n')
for n in list(range(30, 1030, 30)):
consumers.clear()
ring_sum, disc_sum, joined, i, num_of_proc = 0, 0, 0, 0, 0
#for i in range (0, trials):
while i < trials:
if num_of_proc < num_consumers:
consumers.append(mp.Process(target=MyLib.GBMR_mp, args=(1000, 1000, n, prnts, tx, False, results)))
consumers[i].daemon = True
consumers[i].start()
num_of_proc += 1
i += 1
else:
consumers[joined].join()
num_of_proc -= 1
joined += 1
print('Done putting jobs')
for i in range (0, trials):
mxR, discN = results.get()
ring_sum += mxR
disc_sum += discN
avg_ring = ring_sum/trials
avg_disc = disc_sum/trials
print('Done Collecting Results, avg_disc = ', avg_disc,' and avg_rings = ', avg_ring)
s = '{}\t\t{}\t\t{}\t\t{}\n'.format(n,tx,avg_ring,avg_disc)
print('Nodes', n, 'is Done for Tx_range', tx)
output_file.write(s)
output_file.close()
So I've got this code for Producers and Consumers;
import threading
import time
import random
N = 8
buffer = N * [None]
free = threading.Semaphore(N)
items = threading.Semaphore(0)
def prod():
n = 0
i = 0
while True:
time.sleep(random.random())
free.acquire()
buffer[i] = n
i = (i + 1) % N
n += 1
items.release()
def cons():
i = 0
while True:
time.sleep(random.random())
items.acquire()
print(buffer[i])
i = (i + 1) % N
free.release()
def main():
p = threading.Thread(target=prod, args=[])
c = threading.Thread(target=cons, args=[])
p.start()
c.start()
p.join()
c.join()
main()
But I want to be able to have three threads each for the producer and consumer. Can someone suggest a way I could do this using a third semaphore? Thanks.
Assuming this is not a homework about semaphores and you want a real solution, you should use the Queue object, which can handle all of this by itself. If I understood it correctly, you want three producers and three consumers that share one buffer that can have at maximum 8 items. If that's the case, the code can be simplified to something like this:
import threading
import Queue
def prod(queue):
n = 0
while True:
time.sleep(random.random())
queue.put(n)
n += 1
def cons(queue):
while True:
time.sleep(random.random())
n = queue.get()
print n
def main():
N = 8
queue = Queue.Queue(N)
threads = []
for i in range(3):
threads.append(threading.Thread(target=cons, args=[queue])))
threads.append(threading.Thread(target=prod, args=[queue])))
for thread in threads:
thread.start()
for thread in threads:
thread.join() # this will never really finish, because the threads run forever
If you are interested how is the queue implemented internally, you can see the source code here.
I have a list of about 15 years in the year_queue, I need to spawn one process for each year. But depending on which server I am running the code, the number of processors vary. How do I dynamically vary the variable num_processes depending on the number of processers in the server?
If I set num_processes > number of processers, would it automatically spawn accordingly? When I test this - it creates 15 processes & splits the CPU power between them. I am looking for a way to first create 'n' number of processes, where n = number of processers in the server, and then as each of those processes finish, the next is spawned.
for i in range(num_processes):
worker = ForEachPerson(year_queue, result_queue, i, dict_of_files)
print "worker spawned for " + str(i)
worker.start()
results = []
while len(results) < len(years):
result = result_queue.get()
results.append(result)
Anyone had the same issue?
while year_queue.empty() != True:
for i in range(num_processes):
worker = ForEachPerson(year_queue, result_queue, i, dict_of_files)
print "worker spawned for " + str(i)
worker.start()
# collect results off the queue
print "results being collected"
results = []
while len(results) < len(num_processes):
result = result_queue.get()
results.append(result)
Use a multiprocessing Pool. The class does all the tedious work of selecting the right number of processes and running them for you. It also doesn't spawn a new process for each task, but reuses processes once they're done.
def process_year(year):
...
return result
pool = multiprocessing.Pool()
results = pool.map(process_year, year_queue)
from multiprocessing import Process, Queue, cpu_count
from Queue import Empty
class ForEachPerson(Process):
def __init__(self, year_queue, result_queue, i, dict_of_files):
self.year_queue=year_queue
self.result_queue=result_queue
self.i=i
self.dict_of_files=dict_of_files
super(ForEachPerson, self).__init__()
def run(self):
while True:
try:
year=self.year_queue.get()
''' Do something '''
self.result_queue.put(year)
except Empty:
self.result_queue.close()
return
if __name__ == '__main__':
year_queue=Queue()
result_queue=Queue()
dict_of_files={}
start_year=1996
num_years=15
for year in range(start_year, start_year + num_years):
year_queue.put(year)
workers=[]
for i in range(cpu_count()):
worker = ForEachPerson(year_queue, result_queue, i, dict_of_files)
print 'worker spawned for', str(i)
worker.start()
workers.append(worker)
results=[]
while len(results) < num_years:
try:
year=result_queue.get()
results.append(year)
print 'Result:', year
except Empty:
pass
for worker in workers:
worker.terminate()