Python threading: how to limit active threads number? - python

What I need - to have 20 active threads. No more. I need the way to control that the number is no more than 20. And in the end of python script, i need to make sure if all this threads are closed. How to do so?
First try - to sleep 0.1 second. But it is pretty random decision and I have error from time to time: Connection pool is full, discarding connection
list_with_file_paths = [...]
slice_20 = list_with_file_paths[:20]
threads = []
for i in slice_20:
th = threading.Thread(target=process_data, args=(i,))
print('active count:', threading.active_count())
if threading.active_count() < 20:
th.start()
else:
time.sleep(0.1)
th.start()
threads.append(th)
Second try - to append all threads in one list and .join() them. But it is not connected with number of 20 threads.
list_with_file_paths = [...]
slice_20 = list_with_file_paths[:20]
threads = []
for j in range(10): # I take every 20 items from list_with_file_paths but I have shortened for this example
for i in slice_20:
th = threading.Thread(target=process_data, args=(i,))
print('active count:', threading.active_count())
if threading.active_count() < 20:
th.start()
else:
time.sleep(0.1)
th.start()
threads.append(th)
for thread in threads:
thread.join()
Is Limit number of active threads Python only chance to limit active threads number?

Related

How can I make 100 threads write numbers increasingly in a single file?

I know, I should use .join(). I am already using, but here is the thing: I make a round of threads (about 100) to perform some action, and after they complete, I start another 100 threads.
The context is that I am trying to check if x ports on my pc are open using threads. I start 100 threads and each check 100 different values and write their response into a txt file. The problem is that some of the ports are not being written to the file, while others are. When I run the code below, wanting to scan the ports from 3000 to 4000, I wanted my file to have 1000 lines, each specifying if the port is open or closed, but when I run it, it has, like, 930. Sometimes more, sometimes less, but never 1000 lines. Check below this code for another thing I tried.
def check_range_ports(ip_host, initial_port, final_port):
threads = []
count = initial_port
loop = 0
number_of_threads = 100
while count < final_port:
if count + number_of_threads > final_port:
number_of_threads = final_port - count + 1
for i in range(count, count + number_of_threads):
t = threading.Thread(target=check_port, args=(ip_host, i))
t.daemon = True
threads.append(t)
for i in range(number_of_threads):
threads[i].start()
for i in range(number_of_threads):
threads[i].join()
count += number_of_threads
loop += 1
threads = []
def check_port(ip_host, port):
try:
time.sleep(0.5)
my_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM, 0)
my_socket.settimeout(5)
result = my_socket.connect_ex((ip_host, port))
with open("./ports.txt", "a+", encoding="utf-8") as f:
if result == 0:
f.write(f"Port {port} is open.\n")
else:
f.write(f"Port {port} is closed.\n")
my_socket.close()
except socket.timeout:
print("Timeout on socket!")
sys.exit()
except socket.gaierror:
print("Error on the host!")
sys.exit()
except KeyboardInterrupt:
print("Exiting program!")
sys.exit()
Here is another thing I tried. I created 10 threads, and each of these threads created 100 subthreads more, and each of these subthreads would write a line in a file. It works better than the previous, but I can't get 1000 lines exactly, which is what I am aiming.
What I'm thinking of doing is doable? If yes, how can I achieve it?
def start_threads(ip_host, initial_port, final_port):
threads = []
initial_port = 3000
final_port = 4000
number_of_ports_to_be_scanned = final_port - initial_port
ip_host = 'XXX.XXX.X.XX'
number_of_threads = 0
if number_of_ports_to_be_scanned / 100 != 0:
number_of_threads = int(number_of_ports_to_be_scanned / 100) + 1
else:
number_of_threads = number_of_ports_to_be_scanned / 100
count = 0
for i in range(number_of_threads):
# if initial_port + count > final_port:
# number_of_threads = final_port - number_of_ports_to_be_scanned + 1
t = threading.Thread(
target=check_testing_port,
args=(ip_host, initial_port + count, final_port)
)
# t.daemon = True
t.start()
threads.append(t)
count += 100
# for i in range(number_of_threads):
# threads[i].start()
for i in range(number_of_threads):
threads[i].join()
def check_testing_port(ip_host, port, final_port):
sub_threads = []
number_of_sub_threads = 100
print(port)
if port + 100 > final_port:
number_of_sub_threads = port - final_port
for i in range(port, port + number_of_sub_threads):
t = threading.Thread(target=check_port, args=(ip_host, i))
# t.daemon = True
t.start()
sub_threads.append(t)
# for i in range(number_of_sub_threads):
# sub_threads[i].start()
for i in range(number_of_sub_threads):
sub_threads[i].join()
def check_port(ip_host, port):
with open("./testing_ports.txt", "a", encoding="utf-8") as f:
f.write(f"Port {port}" + "\n")
In check_port you wrote
with open("ports.txt", "a+") as f:
f.write(...)
That is insane.
In the sense that, it is a critical section and you're not holding a lock.
Acquire a mutex before messing with the file.
Or write thread-specific files, which
subsequently are combined into a single file.
Better yet, tell all threads to write to
a single Queue, and have just one thread
read enqueued results and append them to a text file.
You need to properly synchronise the access to the shared buffer you are writing to (the output file) concurrently. Only one thread at a time must write to the output file, otherwise you'll get a data race leading to the data corruption you observed.
You can ensure that only one thread is writing to the shared file by using a mutex, a queue or any other suitable concurrency primitive. Here is an example using a queue:
import threading
import time
from queue import Queue
# Sentinel object to signal end of writing
__END = object()
def check_range_ports(ip_host: str):
threads: list[threading.Thread] = []
number_of_threads = 100
queue = Queue()
# Start the compute threads
for i in range(number_of_threads):
t = threading.Thread(target=check_port, args=(ip_host, i, queue))
t.start()
threads.append(t)
# Start the writer thread
tw = threading.Thread(target=writer, args=("output.txt", queue))
tw.start()
# Wait for all compute threads to finish
for i in range(number_of_threads):
threads[i].join()
# Signal to the writer loop to end
queue.put(__END)
# Wait for the writer thread to finish
tw.join()
def check_port(ip_host: str, port: int, queue: Queue):
time.sleep(0.5)
# Enqueue the result to teh synchronisation queue
queue.put((ip_host, port))
def writer(filename: str, queue: Queue):
with open(filename, "w") as f:
while True:
item = queue.get()
# End the write loop if there is no more item
# to process
if item is __END:
break
# This write operation is sequential and thread-safe
ip_host, port = item
f.write(f"Port {port} of host {ip_host} is open.\n")
def main():
check_range_ports("127.0.0.1")
if __name__ == "__main__":
main()

Multi-threading and events exercise in Python

"Write a program using two threads such that one writes even numbers in increasing order and the other odd numbers in incresing order with respect to a certain threashold."
For instance, given 10 I would like to have as output
T1-0
T2-1
T1-2
T2-3
...
T1-8
T2-9
I think an event object should be used in order to alternate between the print of a thread with the other, but I do not know how to implement it in order to make it work, since I think I have not fully grasped the tools to work with threads, yet. I leave here my faulty code for the task
import threading
e = threading.Event()
def even(n):
if e.isSet() == False:
for i in range(0,n,+2):
e.set()
print(i)
e.clear()
e.wait()
def odd(n):
for i in range(1,n,+2):
e.wait()
print(i)
e.set()
t1 = threading.Thread(target = odd, args=(10,))
t2 = threading.Thread(target = even, args=(10,))
t1.start()
t2.start()
t1.join()
t2.join()
import threading
import time
def thread_function(name,max,y):
print(name,y)
time.sleep(0.1)
for i in range(int(max/2)):
time.sleep(0.3)
y= y + 2
print(name,y)
n = input("Input an integer \n")
n = int(n)
p=0
# Create thread
mt1 = threading.Thread(target=thread_function, args=("T-Even",n,p))
mt2 = threading.Thread(target=thread_function, args=("T-Odd",n,p+1))
# Start thread
mt1.start()
time.sleep(0.1)
mt2.start()
mt1.join()

Multiprocessing workers dying prematurely in an odd pattern

i have a script that finds all prime numbers with multiprocessing, however half of the spawned workers die very quickly.
i noticed that workers that are about to die early has no I/O operations at all, while others are running normally.
I spawned 8 workers and half die, this is the task manager view:
This is the function given to workers:
import time
import multiprocessing
def prime(i, processes, maxnum, primes):
while maxnum >= i:
f = False
if i <= 1:
i += processes
continue
else:
for j in range(2, int(i**0.5)+1, 1):
if i % j == 0:
i += processes
f = True
break
if f:
continue
primes.append(i) # append if prime.
i += processes
# increment by number of processes, example: p1 (i =1) p2 (i=2)
#up to i = processes, then all jumps by num of processes, check for bugs
and here is the main, in which workers are spawned:
def main():
start = time.monotonic()
manager = multiprocessing.Manager()
primes = manager.list()
maxnum = 10000000
processes = 8
plist = []
for i in range(1, processes + 1): # adds each new process to plist
plist.append(multiprocessing.Process(target=prime, args=(i, processes, maxnum, primes,)))
for p in plist: # starts the processes in plist and prints out process.pid
p.start()
print(p.pid)
[p.join() for p in plist]
print("time taken: " + str((time.monotonic() - start) / 60) + ' mins')
print(plist)
print(sorted(primes)) #unsure how long does the sorting takes
if __name__ == "__main__": # multiprocessing needs guarding. so all code goes into main i guess
main()
Here are the processes state after 5 seconds of starting:
[<Process(Process-2, started)>, <Process(Process-3, stopped)>, <Process(Process-4, started)>, <Process(Process-5, stopped)>,
<Process(Process-6, started)>, <Process(Process-7, stopped)>, <Process(Process-8, started)>, <Process(Process-9, stopped)>]
What i find unusual here is there is a pattern, for each spawned worker the next dies.

How to start threads with a delay in python

I created threads, added delay in the function, but all threads are executing at the same time. Instead i want threads to start one by one. Is that possible ?
Below is my code
from _thread import start_new_thread
import time
def mul(n):
time.sleep(1)
res = n * n
return res
while 1:
m = input("Enter number ")
t = input("Enter the number of times the function should be executed:")
max_threads = int(t)
for n in range(0, max_threads):
start_new_thread(mul, (m,))
except:
pass
print("Please type only digits (0-9)")
continue
print(f"Started {max_threads} threads.")
First of all, you added the delay inside the thread, causing it to pause after it started. Thus, you are starting all the threads one by one without delay and when each thread starts it waits 1 second before continuing.
So if you want a specific delay - add after you start each thread, in the main thread.
If you want each thread to start after the previous thread finished, you can do the following:
import threading
.
.
.
for n in range(0, max_threads):
t = threading.Thread(target = mul, args=(m,))
t.start()
t.join() # Waits until it is finished

How to create a lock for a specific variable in main while using queues in threads

I am trying to figure out how in python one would declare a specific variable to be locked so that only one thread may access it at a time to avoid race conditions. If I have two threads constantly updating a variable via queues, but I am also updating the variable manually in main, what would be the right way to declare that variable a shared resource by all threads so that only one may access it at a time between the threads being run and main?
I wrote a little example code to show what I mean.
import time
from random import randint
from threading import Thread
from queue import Queue
# Add the amount by random number from 1 - 3 every second
def producer(queue, amount):
while True:
time.sleep(1)
amount[0] += randint(1, 3)
queue.put(amount)
# Subtract the amount by random number from 1 - 3 every second
def consumer(queue, amount):
while True:
item = queue.get()
amount[0] -= randint(1, 3)
queue.task_done()
amount = [10]
queue = Queue()
t1 = Thread(target=producer, args=(queue, amount,))
t2 = Thread(target=consumer, args=(queue, amount,))
t1.start()
t2.start()
while True:
n = input("Type a number or q: ")
if n == 'q':
break
else:
# Here is where I am confused about how to declare the amount a
# shared resource and lock it in a way that the queues would also
# adhere to
amount[0] += int(n)
print("amount is now: {}".format(amount[0]))
t1.join()
t2.join()
It is important to lock the variable when you are updating the value of it. so in your case indeed you require locking mechanism.
How to lock:
create a threading.Lock object which will help you lock and release the block of code.
acquire : to lock the code block. only one thread can enter in this block. other thread will wait until it is released.
release : to release the acquired lock.
In your case:
import time
from random import randint
from threading import Thread,Lock
from queue import Queue
# Add the amount by random number from 1 - 3 every second
def producer(queue, amount,lock):
while True:
time.sleep(1)
lock.acquire()
amount[0] += randint(1, 3)
queue.put(amount)
lock.release()
# Subtract the amount by random number from 1 - 3 every second
def consumer(queue, amount,lock):
while True:
lock.acquire()
item = queue.get()
amount[0] -= randint(1, 3)
queue.task_done()
lock.release()
amount = [10]
lock = Lock()
queue = Queue()
t1 = Thread(target=producer, args=(queue, amount,lock))
t2 = Thread(target=consumer, args=(queue, amount,lock))
t1.start()
t2.start()
...

Categories