class Job(object):
def __init__(self, name):
self.name = name
self.depends = []
self.waitcount = 0
def work(self):
#does some work
def add_dependent(self, another_job)
self.depends.append(another_job)
self.waitcount += 1
so, waitcount is based on the number of jobs you have in depends
job_board = {}
# create a dependency tree
for i in range(1000):
# create random jobs
j = Job(<new name goes here>)
# add jobs to depends if dependent
# record it in job_board
job_board[j.name] = j
# example
# jobC is in self.depends of jobA and jobB
# jobC would have a waitcount of 2
rdyQ = Queue.Queue()
def worker():
try:
job = rdyQ.get()
success = job.work()
# if this job was successful create dependent jobs
if success:
for dependent_job in job.depends:
dependent_job.waitcount -= 1
if dependent_job.waitcount == 0:
rdyQ.put(dependent_job)
and then i would create threads
for i in range(10):
t = threading.Thread( target=worker )
t.daemon=True
t.start()
for job_name, job_obj in job_board.iteritems():
if job_obj.waitcount == 0:
rdyQ.put(job_obj)
while True:
# until all jobs finished wait
Now here is an example:
# example
# jobC is in self.depends of jobA and jobB
# jobC would have a waitcount of 2
now in this scenario, if both jobA and jobB are running and they both tried to decrement waitcount of jobC, weird things were happening
so i put a lock
waitcount_lock = threading.Lock()
and changed this code to:
# if this job was successful create dependent jobs
if success:
for dependent_job in job.depends:
with waitcount_lock:
dependent_job.waitcount -= 1
if dependent_job.waitcount == 0:
rdyQ.put(dependent_job)
and strange things still happen
i.e. same job was being processed by multiple threads, as if the job was put into the queue twice
is it not a best practice to have/modify nested objects when complex objects are being pass amongst threads?
Here's a complete, executable program that appears to work fine. I expect you're mostly seeing "weird" behavior because, as I suggested in a comment, you're counting job successors instead of job predecessors. So I renamed things with "succ" and "pred" in their names to make that much clearer. daemon threads are also usually a Bad Idea, so this code arranges to shut down all the threads cleanly when the work is over. Note too the use of assertions to verify that implicit beliefs are actually true ;-)
import threading
import Queue
import random
NTHREADS = 10
NJOBS = 10000
class Job(object):
def __init__(self, name):
self.name = name
self.done = False
self.succs = []
self.npreds = 0
def work(self):
assert not self.done
self.done = True
return True
def add_dependent(self, another_job):
self.succs.append(another_job)
another_job.npreds += 1
def worker(q, lock):
while True:
job = q.get()
if job is None:
break
success = job.work()
if success:
for succ in job.succs:
with lock:
assert succ.npreds > 0
succ.npreds -= 1
if succ.npreds == 0:
q.put(succ)
q.task_done()
jobs = [Job(i) for i in range(NJOBS)]
for i, job in enumerate(jobs):
# pick some random successors
possible = xrange(i+1, NJOBS)
succs = random.sample(possible,
min(len(possible),
random.randrange(10)))
for succ in succs:
job.add_dependent(jobs[succ])
q = Queue.Queue()
for job in jobs:
if job.npreds == 0:
q.put(job)
print q.qsize(), "ready jobs initially"
lock = threading.Lock()
threads = [threading.Thread(target=worker,
args=(q, lock))
for _ in range(NTHREADS)]
for t in threads:
t.start()
q.join()
# add sentinels so threads end cleanly
for t in threads:
q.put(None)
for t in threads:
t.join()
for job in jobs:
assert job.done
assert job.npreds == 0
CLARIFYING THE LOCK
In a sense, the lock in this code protects "too much". The potential problem it's addressing is that multiple threads may try to decrement the .npreds member of the same Job object simultaneously. Without mutual exclusion, the stored value at the end of that may be anywhere from 1 smaller than its initial value, to the correct result (the initial value minus the number of threads trying to decrement it).
But there's no need to also mutate the queue under lock protection. Queues do their own thread-safe locking. So, e.g., the code could be written like so instead:
for succ in job.succs:
with lock:
npreds = succ.npreds = succ.npreds - 1
assert npreds >= 0
if npreds == 0:
q.put(succ)
It's generally best practice to hold a lock for as little time as possible. However, I find this rewrite harder to follow. Pick your poison ;-)
Related
I have written a bit of code to see the race condition, But it Doesn't happen.
class SharedContent:
def __init__(self, initia_value = 0) -> None:
self.initial_value = initia_value
def incerease(self ,delta = 1):
sleep(1)
self.initial_value += delta
content = SharedContent(0)
threads: list[Thread] = []
for i in range(250):
t = Thread(target=content.incerease)
t.start()
threads.append(t)
#wait until all threads have finished their job
while True:
n = 0
for t in threads:
if t.is_alive():
sleep(0.2)
continue
n += 1
if n == len(threads):
break
print(content.initial_value)
The output is 250 which implies no race condition has happened!
Why is that?
I even tried this with random sleep time but the output was the same.
I changed your program. This version prints a different number every time I run it.
#!/usr/bin/env python3
from threading import Thread
class SharedContent:
def __init__(self, initia_value = 0) -> None:
self.initial_value = initia_value
def incerease(self ,delta = 1):
for i in range(0, 1000000):
self.initial_value += delta
content = SharedContent(0)
threads = []
for i in range(2):
t = Thread(target=content.incerease)
t.start()
threads.append(t)
#wait until all threads have finished their job
for t in threads:
t.join()
print(content.initial_value)
What I changed:
Only two threads instead of 250.
Got rid of sleep() calls.
Each thread increments the variable one million times instead of just one time.
Main program uses join() to wait for the threads to finish.
I am trying to make 2 processes communicate between each other using the multiprocessing package in Python, and more precisely the Queue() class. From the parent process, I want to get an updated value of the child process each 5 seconds. This child process is a class function. I have done a toy example where everything works fine.
However, when I try to implement this solution in my project, it seems that the Queue.put() method of the child process in the sub-module won't send anything to the parent process, because the parent process won't print the desired value and the code never stops running. Actually, the parent process only prints the value sent to the child process, which is True here, but as I said, never stops.
So my questions are:
Is there any error in my toy-example ?
How should I modify my project in order to get it working just like my toy example ?
Toy example: works
main module
from multiprocessing import Process, Event, Lock, Queue, Pipe
import time
import test_mod as test
def loop(output):
stop_event = Event()
q = Queue()
child_process = Process(target=test.child.sub, args=(q,))
child_process.start()
i = 0
print("started at {} ".format(time.time()))
while not stop_event.is_set():
i+=1
time.sleep(5)
q.put(True)
print(q.get())
if i == 5:
child_process.terminate()
stop_event.set()
output.put("main process looped")
if __name__ == '__main__':
stop_event, output = Event(), Queue()
k = 0
while k < 5:
loop_process = Process(target=loop, args=(output,))
loop_process.start()
print(output.get())
loop_process.join()
k+=1
submodule
from multiprocessing import Process, Event, Lock, Queue, Pipe
import time
class child(object):
def __init__(self):
pass
def sub(q):
i = 0
while i < 2000:
latest_value = time.time()
accord = q.get()
if accord == True:
q.put(latest_value)
accord = False
time.sleep(0.0000000005)
i+=1
Project code: doesn't work
main module
import neat #package in which the submodule is
import *some other stuff*
def run(config_file):
config = neat.Config(some configuration)
p = neat.Population(config)
**WHERE MY PROBLEM IS**
stop_event = Event()
q = Queue()
pe = neat.ParallelEvaluator(**args)
child_process = Process(target=p.run, args=(pe.evaluate, q, other args))
child_process.start()
i = 0
while not stop_event.is_set():
q.put(True)
print(q.get())
time.sleep(5)
i += 1
if i == 5:
child_process.terminate()
stop_event.set()
if __name__ == '__main__':
run(config_file)
submodule
class Population(object):
def __init__():
*initialization*
def run(self, q, other args):
while n is None or k < n:
*some stuff*
accord = add_2.get()
if accord == True:
add_2.put(self.best_genome.fitness)
accord = False
return self.best_genome
NB:
I am not used to multiprocessing
I have tried to give the most relevant parts of my project, given that the entire code would be far too long.
I have also considered using Pipe(), however this option didn't work either.
If I see it correctly, your desired submodule is the class Population. However, you start your process with a parameter of the type ParallelEvaluator. Next, I can't see that you supply your Queue q to the sub-Process. That's what I see from the code provided:
stop_event = Event()
q = Queue()
pe = neat.ParallelEvaluator(**args)
child_process = Process(target=p.run, args=(pe.evaluate, **args)
child_process.start()
Moreover, the following lines create a race condition:
q.put(True)
print(q.get())
The get command is like a pop. So it takes an element and deletes it from the queue. If your sub-process doesn't access the queue between these two lines (because it is busy), the True will never make it to the child-process. Hence, it is better two use multiple queues. One for each direction. Something like:
stop_event = Event()
q_in = Queue()
q_out = Queue()
pe = neat.ParallelEvaluator(**args)
child_process = Process(target=p.run, args=(pe.evaluate, **args))
child_process.start()
i = 0
while not stop_event.is_set():
q_in.put(True)
print(q_out.get())
time.sleep(5)
i += 1
if i == 5:
child_process.terminate()
stop_event.set()
This is your submodule
class Population(object):
def __init__():
*initialization*
def run(self, **args):
while n is None or k < n:
*some stuff*
accord = add_2.get() # add_2 = q_in
if accord == True:
add_3.put(self.best_genome.fitness) #add_3 = q_out
accord = False
return self.best_genome
Given the following class:
from abc import ABCMeta, abstractmethod
from time import sleep
import threading
from threading import active_count, Thread
class ScraperPool(metaclass=ABCMeta):
Queue = []
ResultList = []
def __init__(self, Queue, MaxNumWorkers=0, ItemsPerWorker=50):
# Initialize attributes
self.MaxNumWorkers = MaxNumWorkers
self.ItemsPerWorker = ItemsPerWorker
self.Queue = Queue # For testing purposes.
def initWorkerPool(self, PrintIDs=True):
for w in range(self.NumWorkers()):
Thread(target=self.worker, args=(w + 1, PrintIDs,)).start()
sleep(1) # Explicitly wait one second for this worker to start.
def run(self):
self.initWorkerPool()
# Wait until all workers (i.e. threads) are done.
while active_count() > 1:
print("Active threads: " + str(active_count()))
sleep(5)
self.HandleResults()
def worker(self, id, printID):
if printID:
print("Starting worker " + str(id) + ".")
while (len(self.Queue) > 0):
self.scraperMethod()
if printID:
print("Worker " + str(id) + " is quiting.")
# Todo Kill is this Thread.
return
def NumWorkers(self):
return 1 # Simplified for testing purposes.
#abstractmethod
def scraperMethod(self):
pass
class TestScraper(ScraperPool):
def scraperMethod(self):
# print("I am scraping.")
# print("Scraping. Threads#: " + str(active_count()))
temp_item = self.Queue[-1]
self.Queue.pop()
self.ResultList.append(temp_item)
def HandleResults(self):
print(self.ResultList)
ScraperPool.register(TestScraper)
scraper = TestScraper(Queue=["Jaap", "Piet"])
scraper.run()
print(threading.active_count())
# print(scraper.ResultList)
When all the threads are done, there's still one active thread - threading.active_count() on the last line gets me that number.
The active thread is <_MainThread(MainThread, started 12960)> - as printed with threading.enumerate().
Can I assume that all my threads are done when active_count() == 1?
Or can, for instance, imported modules start additional threads so that my threads are actually done when active_count() > 1 - also the condition for the loop I'm using in the run method.
You can assume that your threads are done when active_count() reaches 1. The problem is, if any other module creates a thread, you'll never get to 1. You should manage your threads explicitly.
Example: You can put the threads in a list and join them one at a time. The relevant changes to your code are:
def __init__(self, Queue, MaxNumWorkers=0, ItemsPerWorker=50):
# Initialize attributes
self.MaxNumWorkers = MaxNumWorkers
self.ItemsPerWorker = ItemsPerWorker
self.Queue = Queue # For testing purposes.
self.WorkerThreads = []
def initWorkerPool(self, PrintIDs=True):
for w in range(self.NumWorkers()):
thread = Thread(target=self.worker, args=(w + 1, PrintIDs,))
self.WorkerThreads.append(thread)
thread.start()
sleep(1) # Explicitly wait one second for this worker to start.
def run(self):
self.initWorkerPool()
# Wait until all workers (i.e. threads) are done. Waiting in order
# so some threads further in the list may finish first, but we
# will get to all of them eventually
while self.WorkerThreads:
self.WorkerThreads[0].join()
self.HandleResults()
according to docs active_count() includes the main thread, so if you're at 1 then you're most likely done, but if you have another source of new threads in your program then you may be done before active_count() hits 1.
I would recommend implementing explicit join method on your ScraperPool and keeping track of your workers and explicitly joining them to main thread when needed instead of checking that you're done with active_count() calls.
Also remember about GIL...
I am using process pools(including 3 processes). In every process, I have set (created) some threads by using the thread classes to speed handle something.
At first, everything was OK. But when I wanted to change some variable in a thread, I met an odd situation.
For testing or to know what happens, I set a global variable COUNT to test. Honestly, I don't know this is safe or not. I just want to see, by using multiprocessing and threading can I change COUNT or not?
#!/usr/bin/env python
# encoding: utf-8
import os
import threading
from Queue import Queue
from multiprocessing import Process, Pool
# global variable
max_threads = 11
Stock_queue = Queue()
COUNT = 0
class WorkManager:
def __init__(self, work_queue_size=1, thread_pool_size=1):
self.work_queue = Queue()
self.thread_pool = [] # initiate, no have a thread
self.work_queue_size = work_queue_size
self.thread_pool_size = thread_pool_size
self.__init_work_queue()
self.__init_thread_pool()
def __init_work_queue(self):
for i in xrange(self.work_queue_size):
self.work_queue.put((func_test, Stock_queue.get()))
def __init_thread_pool(self):
for i in xrange(self.thread_pool_size):
self.thread_pool.append(WorkThread(self.work_queue))
def finish_all_threads(self):
for i in xrange(self.thread_pool_size):
if self.thread_pool[i].is_alive():
self.thread_pool[i].join()
class WorkThread(threading.Thread):
def __init__(self, work_queue):
threading.Thread.__init__(self)
self.work_queue = work_queue
self.start()
def run(self):
while self.work_queue.qsize() > 0:
try:
func, args = self.work_queue.get(block=False)
func(args)
except Queue.Empty:
print 'queue is empty....'
def handle(process_name):
print process_name, 'is running...'
work_manager = WorkManager(Stock_queue.qsize()/3, max_threads)
work_manager.finish_all_threads()
def func_test(num):
# use a global variable to test what happens
global COUNT
COUNT += num
def prepare():
# prepare test queue, store 50 numbers in Stock_queue
for i in xrange(50):
Stock_queue.put(i)
def main():
prepare()
pools = Pool()
# set 3 process
for i in xrange(3):
pools.apply_async(handle, args=('process_'+str(i),))
pools.close()
pools.join()
global COUNT
print 'COUNT: ', COUNT
if __name__ == '__main__':
os.system('printf "\033c"')
main()
Now, finally the result of COUNT is just 0.I am unable to understand whats happening here?
You print the COUNT var in the father process. Variables doesn't sync across processes because they doesn't share memory, that means that the variable stay 0 at the father process and is increased in the subprocesses
In the case of threading, threads share memory, that means that they share the variable count, so they should have COUNT as more than 0 but again they are at the subprocesses, and when they change the variable, it doesn't update it in other processes.
I have an application that fires up a series of threads. Occassionally, one of these threads dies (usually due to a network problem). How can I properly detect a thread crash and restart just that thread? Here is example code:
import random
import threading
import time
class MyThread(threading.Thread):
def __init__(self, pass_value):
super(MyThread, self).__init__()
self.running = False
self.value = pass_value
def run(self):
self.running = True
while self.running:
time.sleep(0.25)
rand = random.randint(0,10)
print threading.current_thread().name, rand, self.value
if rand == 4:
raise ValueError('Returned 4!')
if __name__ == '__main__':
group1 = []
group2 = []
for g in range(4):
group1.append(MyThread(g))
group2.append(MyThread(g+20))
for m in group1:
m.start()
print "Now start second wave..."
for p in group2:
p.start()
In this example, I start 4 threads then I start 4 more threads. Each thread randomly generates an int between 0 and 10. If that int is 4, it raises an exception. Notice that I don't join the threads. I want both group1 and group2 list of threads to be running. I found that if I joined the threads it would wait until the thread terminated. My thread is supposed to be a daemon process, thus should rarely (if ever) hit the ValueError Exception this example code is showing and should be running constantly. By joining it, the next set of threads doesn't begin.
How can I detect that a specific thread died and restart just that one thread?
I have attempted the following loop right after my for p in group2 loop.
while True:
# Create a copy of our groups to iterate over,
# so that we can delete dead threads if needed
for m in group1[:]:
if not m.isAlive():
group1.remove(m)
group1.append(MyThread(1))
for m in group2[:]:
if not m.isAlive():
group2.remove(m)
group2.append(MyThread(500))
time.sleep(5.0)
I took this method from this question.
The problem with this, is that isAlive() seems to always return True, because the threads never restart.
Edit
Would it be more appropriate in this situation to use multiprocessing? I found this tutorial. Is it more appropriate to have separate processes if I am going to need to restart the process? It seems that restarting a thread is difficult.
It was mentioned in the comments that I should check is_active() against the thread. I don't see this mentioned in the documentation, but I do see the isAlive that I am currently using. As I mentioned above, though, this returns True, thus I'm never able to see that a thread as died.
I had a similar issue and stumbled across this question. I found that join takes a timeout argument, and that is_alive will return False once the thread is joined. So my audit for each thread is:
def check_thread_alive(thr):
thr.join(timeout=0.0)
return thr.is_alive()
This detects thread death for me.
You could potentially put in an a try except around where you expect it to crash (if it can be anywhere you can do it around the whole run function) and have an indicator variable which has its status.
So something like the following:
class MyThread(threading.Thread):
def __init__(self, pass_value):
super(MyThread, self).__init__()
self.running = False
self.value = pass_value
self.RUNNING = 0
self.FINISHED_OK = 1
self.STOPPED = 2
self.CRASHED = 3
self.status = self.STOPPED
def run(self):
self.running = True
self.status = self.RUNNING
while self.running:
time.sleep(0.25)
rand = random.randint(0,10)
print threading.current_thread().name, rand, self.value
try:
if rand == 4:
raise ValueError('Returned 4!')
except:
self.status = self.CRASHED
Then you can use your loop:
while True:
# Create a copy of our groups to iterate over,
# so that we can delete dead threads if needed
for m in group1[:]:
if m.status == m.CRASHED:
value = m.value
group1.remove(m)
group1.append(MyThread(value))
for m in group2[:]:
if m.status == m.CRASHED:
value = m.value
group2.remove(m)
group2.append(MyThread(value))
time.sleep(5.0)