python multi-thread timing - python

I'm trying to get a hold of multi-threading in Python.
I wrote this snippet of code:
import requests
import threading
from time import time,sleep
start_time = time()
class myThread(threading.Thread):
def __init__(self,threadID,name):
threading.Thread.__init__(self)
self.threadID = threadID
self.name = name
def run(self):
print(self.name + " Starting")
start = time()
url = 'https://stackoverflow.com'
for i in range(20):
res = requests.post(url,timeout=5)
end = time()
print("Final Thread Time: " + str(end-start) + 'For Thread {}'.format(self.threadID))
threads = []
threadID = 1
for i in range(1,5):
thread = myThread(threadID,'thread{}'.format(threadID))
thread.start()
threads.append(thread)
threadID += 1
for t in threads:
t.join()
final_time_end = time()
print("Final Time: " + str(start_time - final_time_end))
So basically it's sending 20 post requests to a url in each thread.
The result is:
Final Thread Time: 6.0695300102 For Thread 3
Final Thread Time: 6.8553800583 For Thread 1
Final Thread Time: 6.9735219479 For Thread 5
Final Thread Time: 6.5822350979 For Thread 4
Final Thread Time: 11.330765152 For Thread 2
I have a dual core CPU,what I don't understand is why almost each time I run this script,4 threads finish at nearly same time(6s),but 1 thread takes about 11s(btw sometimes they all finish at the same time).
This happens when I run this code with 4 or 3 threads as well.the last one usually takes more time to finish.
What's happening in the last thread that takes more time?

Related

Race Condition Doesn't happen

I have written a bit of code to see the race condition, But it Doesn't happen.
class SharedContent:
def __init__(self, initia_value = 0) -> None:
self.initial_value = initia_value
def incerease(self ,delta = 1):
sleep(1)
self.initial_value += delta
content = SharedContent(0)
threads: list[Thread] = []
for i in range(250):
t = Thread(target=content.incerease)
t.start()
threads.append(t)
#wait until all threads have finished their job
while True:
n = 0
for t in threads:
if t.is_alive():
sleep(0.2)
continue
n += 1
if n == len(threads):
break
print(content.initial_value)
The output is 250 which implies no race condition has happened!
Why is that?
I even tried this with random sleep time but the output was the same.
I changed your program. This version prints a different number every time I run it.
#!/usr/bin/env python3
from threading import Thread
class SharedContent:
def __init__(self, initia_value = 0) -> None:
self.initial_value = initia_value
def incerease(self ,delta = 1):
for i in range(0, 1000000):
self.initial_value += delta
content = SharedContent(0)
threads = []
for i in range(2):
t = Thread(target=content.incerease)
t.start()
threads.append(t)
#wait until all threads have finished their job
for t in threads:
t.join()
print(content.initial_value)
What I changed:
Only two threads instead of 250.
Got rid of sleep() calls.
Each thread increments the variable one million times instead of just one time.
Main program uses join() to wait for the threads to finish.

Memory Shared with threading

I have a problem trying to link threads memory. I want that the counter shares the memory between threads that all of them only count to a certain number(100 in this case) and finally it is returned to the main thread. The problem is that even with lock all of the threads have a single count
import threading
from threading import Thread, Lock
import time
import multiprocessing
import random
def create_workers(n_threads, counter):
# counter = 0
workers = []
for n in range(n_threads):
worker = DataCampThread('Thread - ' + str(n), counter)
workers.append(worker)
for worker in workers:
worker.start()
for worker in workers:
worker.join()
return counter
def thread_delay(thread_name, num, delay):
num += 1
time.sleep(delay)
print(thread_name, '-------->', num)
return num
class DataCampThread(Thread):
def __init__(self, name, cou):
Thread.__init__(self)
self.name = name
self.counter = cou
delay = random.randint(1, 2)
self.delay = delay
self.lock = Lock()
def run(self):
print('Starting Thread:', self.name)
while self.counter < 100:
self.lock.acquire()
self.counter = thread_delay(self.name, self.counter, self.delay)
self.lock.release()
print('Execution of Thread:', self.name, 'is complete!')
if __name__ == '__main__':
# create the agent
n_threads = 3#multiprocessing.cpu_count()
counter = 0
create_workers(n_threads, counter)
print(counter)
print("Thread execution is complete!")
As I mentioned in the comments, I'm not really sure what you're trying to do — but here's an uninformed guess to (hopefully) expedite things.
Based on your response to the initial version of my answer about wanting to avoid a global variable, the counter is now a class attribute that will automatically be shared by all instances of the class. Each thread has its own name and randomly selected amount of time it delays between updates to the shared class attribute named counter.
Note: The test code redefines the print() function to prevent it from being used by more that one thread at a time.
import threading
from threading import Thread, Lock
import time
import random
MAXVAL = 10
class DataCampThread(Thread):
counter = 0 # Class attribute.
counter_lock = Lock() # Control concurrent access to shared class attribute.
def __init__(self, name):
super().__init__() # Initialize base class.
self.name = name
self.delay = random.randint(1, 2)
def run(self):
print('Starting Thread:', self.name)
while True:
with self.counter_lock:
if self.counter >= MAXVAL:
break # Exit while loop (also releases lock).
# self.counter += 1 # DON'T USE - would create an instance-level attribute.
type(self).counter += 1 # Update class attribute.
print(self.name, '-------->', self.counter)
time.sleep(self.delay)
print('Execution of Thread:', self.name, 'is complete!')
def main(n_threads, maxval):
''' Create and start worker threads, then wait for them all to finish. '''
workers = [DataCampThread(name=f'Thread #{i}') for i in range(n_threads)]
for worker in workers:
worker.start()
# Wait for all treads to finish.
for worker in workers:
worker.join()
if __name__ == '__main__':
import builtins
def print(*args, **kwargs):
''' Redefine print to prevent concurrent printing. '''
with print.lock:
builtins.print(*args, **kwargs)
print.lock = Lock() # Function attribute.
n_threads = 3
main(n_threads, MAXVAL)
print()
print('Thread execution is complete!')
print('final counter value:', DataCampThread.counter)
Sample output:
Starting Thread: Thread #0
Starting Thread: Thread #1
Thread #0 --------> 1
Starting Thread: Thread #2
Thread #1 --------> 2
Thread #2 --------> 3
Thread #1 --------> 4
Thread #0 --------> 5
Thread #2 --------> 6
Thread #2 --------> 7
Thread #1 --------> 8
Thread #0 --------> 9
Thread #2 --------> 10
Execution of Thread: Thread #1 is complete!
Execution of Thread: Thread #0 is complete!
Execution of Thread: Thread #2 is complete!
Thread execution is complete!
final counter value: 10

Multi-threading in Python

I am facing some issues while implementing multi-threading in python. The issue is very specific to my use case. Having gone through numerous posts on the same, I deployed the most widely suggested/used method for doing so.
I start by defining my thread class as follows.
class myThread(Thread):
def __init__(self, graphobj, q):
Thread.__init__(self)
self.graphobj = graphobj
self.q = q
def run(self):
improcess(self.graphobj, self.q)
Post which I define my function that does all the processing required.
def improcess(graphobj, q):
while not exitFlag:
queueLock.acquire()
if not q.empty():
photo_id = q.get()
queueLock.release()
# Complete processing
else:
queueLock.release()
Now comes the part where I am stuck. I am able to run the below mentioned code exactly as it is without any issues. However if I try and wrap the same in a function as such it breaks down.
def train_control(graphobj, photo_ids):
workQueue = Queue(len(photo_ids))
for i in range(1,5):
thread = myThread(graphobj=graphobj, q=workQueue)
thread.start()
threads.append(thread)
queueLock.acquire()
for photo_id in photo_ids:
workQueue.put(photo_id)
queueLock.release()
while not workQueue.empty():
pass
exitFlag = 1
for t in threads:
t.join()
By breaking down I mean that the threads complete their work but they don't stop waiting i.e. the exitFlag is never set to 1. I am unsure as to how to make this work.
Unfortunately the design of our systems is such that this piece of codes needs to be wrapped in a function which can be invoked by another module, so pulling it out is not really an option.
Looking forward to hearing from experts on this. Thanks in advance.
Edit : Forgot to mention this in the first draft. I globally initialize exitFlag and set its value to 0.
Below is the minimum, verifiable code snippet that I created to capture this problem:
import threading
import Queue
globvar01 = 5
globvar02 = 7
exitFlag = 0
globlist = []
threads = []
queueLock = threading.Lock()
workQueue = Queue.Queue(16)
class myThread(threading.Thread):
def __init__(self, threadID, q):
threading.Thread.__init__(self)
self.threadID = threadID
self.q = q
def run(self):
print "Starting thread " + str(self.threadID)
myfunc(self.threadID, self.q)
print "Exiting thread " + str(self.threadID)
def myfunc(threadID, q):
while not exitFlag:
queueLock.acquire()
if not workQueue.empty():
thoughtnum = q.get()
queueLock.release()
print "Processing thread " + str(threadID)
if (thoughtnum < globvar01):
globlist.append([1,2,3])
elif (thoughtnum < globvar02):
globlist.append([2,3,4])
else:
queueLock.release()
def controlfunc():
for i in range(1,5):
thread = myThread(i, workQueue)
thread.start()
threads.append(thread)
queueLock.acquire()
for i in range(1,11):
workQueue.put(i)
queueLock.release()
# Wait for queue to empty
while not workQueue.empty():
pass
exitFlag = 1
# Wait for all threads to complete
for t in threads:
t.join()
print "Starting main thread"
controlfunc()
print "Exiting Main Thread"
From your MCVE, the only thing missing is:
while not workQueue.empty():
pass
global exitFlag # Need this or `exitFlag` is a local variable only.
exitFlag = 1
You could eliminate the queueLock and the exitFlag, however, by using a sentinel value in the Queue to shut down the worker threads, and it eliminates the spin-waiting. Worker threads will sleep on a q.get() and the main thread won't have to spin-wait for an empty queue:
#!python2
from __future__ import print_function
import threading
import Queue
debug = 1
console = threading.Lock()
def tprint(*args,**kwargs):
if debug:
name = threading.current_thread().getName()
with console:
print('{}: '.format(name),end='')
print(*args,**kwargs)
globvar01 = 5
globvar02 = 7
globlist = []
threads = []
workQueue = Queue.Queue(16)
class myThread(threading.Thread):
def __init__(self, threadID, q):
threading.Thread.__init__(self)
self.threadID = threadID
self.q = q
def run(self):
tprint("Starting thread " + str(self.threadID))
myfunc(self.threadID, self.q)
tprint("Exiting thread " + str(self.threadID))
def myfunc(threadID, q):
while True:
thoughtnum = q.get()
tprint("Processing thread " + str(threadID))
if thoughtnum is None:
break
elif thoughtnum < globvar01:
globlist.append([1,2,3])
elif thoughtnum < globvar02:
globlist.append([2,3,4])
def controlfunc():
for i in range(1,5):
thread = myThread(i, workQueue)
thread.start()
threads.append(thread)
for i in range(1,11):
workQueue.put(i)
# Wait for all threads to complete
for t in threads:
workQueue.put(None)
for t in threads:
t.join()
tprint("Starting main thread")
controlfunc()
tprint("Exiting Main Thread")
Output:
MainThread: Starting main thread
Thread-1: Starting thread 1
Thread-2: Starting thread 2
Thread-3: Starting thread 3
Thread-4: Starting thread 4
Thread-1: Processing thread 1
Thread-2: Processing thread 2
Thread-3: Processing thread 3
Thread-4: Processing thread 4
Thread-1: Processing thread 1
Thread-2: Processing thread 2
Thread-3: Processing thread 3
Thread-4: Processing thread 4
Thread-1: Processing thread 1
Thread-2: Processing thread 2
Thread-3: Processing thread 3
Thread-4: Processing thread 4
Thread-1: Processing thread 1
Thread-2: Processing thread 2
Thread-3: Exiting thread 3
Thread-4: Exiting thread 4
Thread-1: Exiting thread 1
Thread-2: Exiting thread 2
MainThread: Exiting Main Thread
You need to make sure exitFlag is set to 0 (False) before spawning any threads otherwise in impprocess() they won't do anything and the queue will remain non-empty.
This problem could happen if you have exitFlag as a global and it's not cleared from a previous run.

Python - Join Multiple Threads With Timeout

I have multiple Process threads running and I'd like to join all of them together with a timeout parameter. I understand that if no timeout were necessary, I'd be able to write:
for thread in threads:
thread.join()
One solution I thought of was to use a master thread that joined all the threads together and attempt to join that thread. However, I received the following error in Python:
AssertionError: can only join a child process
The code I have is below.
def join_all(threads):
for thread in threads:
thread.join()
if __name__ == '__main__':
for thread in threads:
thread.start()
master = multiprocessing.Process(target=join_all, args=(threads,))
master.start()
master.join(timeout=60)
You could loop over each thread repeatedly, doing non-blocking checks to see if the thread is done:
import time
def timed_join_all(threads, timeout):
start = cur_time = time.time()
while cur_time <= (start + timeout):
for thread in threads:
if not thread.is_alive():
thread.join()
time.sleep(1)
cur_time = time.time()
if __name__ == '__main__':
for thread in threads:
thread.start()
timed_join_all(threads, 60)
This answer is initially based on that by dano but has a number of changes.
join_all takes a list of threads and a timeout (in seconds) and attempts to join all of the threads. It does this by making a non-blocking call to Thread.join (by setting the timeout to 0, as join with no arguments will never timeout).
Once all the threads have finished (by checking is_alive() on each of them) the loop will exit prematurely.
If some threads are still running by the time the timeout occurs, the function raises a RuntimeError with information about the remaining threads.
import time
def join_all(threads, timeout):
"""
Args:
threads: a list of thread objects to join
timeout: the maximum time to wait for the threads to finish
Raises:
RuntimeError: is not all the threads have finished by the timeout
"""
start = cur_time = time.time()
while cur_time <= (start + timeout):
for thread in threads:
if thread.is_alive():
thread.join(timeout=0)
if all(not t.is_alive() for t in threads):
break
time.sleep(0.1)
cur_time = time.time()
else:
still_running = [t for t in threads if t.is_alive()]
num = len(still_running)
names = [t.name for t in still_running]
raise RuntimeError('Timeout on {0} threads: {1}'.format(num, names))
if __name__ == '__main__':
for thread in threads:
thread.start()
join_all(threads, 60)
In my usage of this, it was inside a test suite where the threads were dæmonised versions of ExcThread so that if the threads never finished running, it wouldn't matter.
The following code joins each process, waiting a certain amount of time. If the proc returns fast enough, the timeout is reduced, then the next process is joined. If a timeout occurs, an error message is shown and the entire system exits to the caller.
source
import multiprocessing, sys, time
# start three procs that run for differing lengths of time
procs = [
multiprocessing.Process(
target=time.sleep, args=[num], name='%d sec'%num,
)
for num in [1,2,5]
]
for p in procs:
p.start()
print p
timeleft = 3.0
print 'Join, timeout after {} seconds'.format(timeleft)
for p in procs:
orig = time.time()
print '{}: join, {:.3f} sec left...'.format(p, timeleft)
p.join(timeleft)
timeleft -= time.time() - orig
if timeleft <= 0.:
sys.exit('timed out!')
example with timeout
We start three procs: one waits for 1 sec, another for 3 sec, the last for 5 seconds. Then we `join` them, timing out after 3 seconds -- the last proc will be *interrupted*.
<Process(1 sec, started)>
<Process(2 sec, started)>
<Process(5 sec, started)>
Join, timeout after 3.0 seconds
<Process(1 sec, started)>: join, 3.000 sec left...
<Process(2 sec, started)>: join, 1.982 sec left...
<Process(5 sec, started)>: join, 0.965 sec left...
timed out!
I'm writing this here, just to make sure that I don't forget it. The principle of the answer is the same as the one of dano. Also the code snippet is a bit more pythonic:
threads = []
timeout = ...
# create and start the threads
for work in ...:
thread = threading.Thread(target=worker)
thread.daemon = True # without this the thread might outlive its parent
thread.start()
threads.append(thread)
# Wait for workers to finish or for timeout
stop_time = time.time() + timeout
while any(t.isAlive for t in threads) and (time.time() < stop_time):
time.sleep(0.1)

How I can execute code pararelly or multithread

The following code works for me, the problem is that each thread has to wait until throws to end or at least the perception that I have because when I put the sleep (10) the waiting time is indicated and then continuous.
What I wish is that the haul thread without having to wait for the internal code to run.
It is my code (example):
import threading
from time import sleep
class MyThread(threading.Thread):
def __init__(self, num):
threading.Thread.__init__(self)
self.num = num
def run(self):
print "I'm the thread", self.num
sleep(10)
print "I'm the thread, after 10 seg"
print "I'm the main thread"
for i in range(0, 10):
t = MyThread(i)
t.start()
t.join()
Thanks in advances.
Use 2 for loops: 1 to start the threads and one to wait for them:
# create all threads
ts = [MyThread(i) for i in range(10)]
# start all threads
for t in ts:
t.start()
# wait for all threads
for t in ts:
t.join()

Categories