Why my python multi process code runs very slow? - python

# multi-processes
from multiprocessing import Process, Queue
class Worker(object):
def __init__(self, queue):
self.queue = queue
self.process_num = 10 <------------ 10 processes
self.count = 0
def start(self):
for i in range(self.process_num):
p = Process(target = self.run)
p.start()
p.join()
def run(self):
while True:
self.count += 1
user = self.queue.get()
# do something not so fast like time.sleep(1)
print self.count
if self.queue.empty():
break
I use Worker().start(queue) to start the program, but the output is not so fast as i expected(Seems only one process are running).
Is there any problem in my code ?

Yes, you're only running one process at a time, you're waiting for each process to terminate before starting the next;
def start(self):
for i in range(self.process_num):
p = Process(target = self.run)
p.start() <-- starts a new process
p.join() <-- waits for the process to terminate
In other words, you're starting 10 processes, but the second one won't start until the first one terminates and so on.
For what you're trying to do, it may be better not to use Process manually and instead use a Process Pool.

Related

Python in AWS Lambda: multiprocessing, terminate other processes when one finished

I learned that AWS Lambda does not support multiprocessing.Pool and multiprocessing.Queue from this other question.
I'm also working on Python multiprocessing in AWS Lambda. But my question: how do we terminate the main process when the first child process returns? (all child processes will return with different execution time)
What I have here:
import time
from multiprocessing import Process, Pipe
class run_func():
number = 0
def __init__(self, number):
self.number = number
def subrun(self, input, conn):
# subprocess function with different execution time based on input.
response = subprocess(input)
conn.send([input, response])
conn.close()
def run(self):
number = self.number
processes = []
parent_connections = []
for i in range(0, number):
parent_conn, child_conn = Pipe()
parent_connections.append(parent_conn)
process = Process(target=self.subrun, args=(i, child_conn,))
processes.append(process)
for process in processes:
process.start()
for process in processes:
process.join()
results = []
for parent_connection in parent_connections:
resp = parent_connection.recv()
print(resp)
results.append((resp[0],resp[1]))
return results
def lambda_handler(event, context):
starttime = time.time()
results = []
work = run_func(int(event['number']))
results = work.run()
print("Results : {}".format(results))
print('Time: {} seconds'.format(time.time() - starttime))
return output
The current program will return until all child processes finish (with for parent_connection in parent_connections). But I wonder how to terminate with the first child process finish? (terminate the main at least, other child processes - it's ok to leave it running)
Added:
To be clear, I mean the first returned child process (may not be the first created child).
So the join() loop is the one which waits for all child process to complete.
If we break that after completing the first child and terminate all other process forcefully it'll work for you
class run_func():
number = 0
def __init__(self, number):
self.number = number
def subrun(self, input, conn):
# subprocess function with different execution time based on input.
response = subprocess(input)
conn.send([input, response])
conn.close()
def run(self):
number = self.number
processes = []
parent_connections = []
for i in range(0, number):
parent_conn, child_conn = Pipe()
parent_connections.append(parent_conn)
process = Process(target=self.subrun, args=(i, child_conn,))
processes.append(process)
for process in processes:
process.start()
for process in processes:
process.join()
break
results = []
for parent_connection in parent_connections:
resp = parent_connection.recv()
print(resp)

Stopping a python thread while the queue is not empty

I have some code which I hopefully boiled down to a correct MWE.
My goal is to stop the (multiple) threads if a list within the thread has a specific length.
In contrast to the MWE it is not known how many iterations are needed:
from queue import Queue
from threading import Thread
def is_even(n):
return n % 2 == 0
class MT(Thread):
def __init__(self, queue):
super().__init__()
self.queue = queue
self.output = []
def run(self):
while len(self.output) < 4:
task = self.queue.get()
if is_even(task):
self.output.append(task)
self.queue.task_done()
else:
self.queue.task_done()
print(self.output)
print('done')
queue = Queue(10)
threads = 1
thr = []
for th in range(threads):
thr.append(MT(queue))
for th in thr:
th.start()
for i in range(100):
queue.put(i)
queue.join()
for th in thr:
th.join()
print('finished')
This code wil not hit finish...
To quote the documentation,
Queue.join()
Blocks until all items in the queue have been gotten and processed.
You have placed 100 items in the queue. The thread pulls 4 items, and completes. There are still 96 unprocessed items, and nobody is going to pull them. Therefore, queue.join() never returns.

Wait for threads to finish before running them again

I am making a program that controls 2 motors through a raspberry Pi. I am running python code and I am wondering how to achieve the following :
Run motor1
Run motor2 simultaneously
Wait for both motors to finish
Run motor1
Run motor2 simultaneously
etc.
What I have done so far is creating a Thread and using a queue.
class Stepper(Thread):
def __init__(self, stepper):
Thread.__init__(self)
self.stepper = stepper
self.q = Queue(maxsize=0)
def setPosition(self, pos):
self.q.put(pos)
def run(self):
while not self.q.empty():
item = self.q.get()
// run motor and do some stuff
thread_1 = Stepper(myStepper1)
thread_2 = Stepper(myStepper2)
thread_1.start()
thread_2.start()
loop = 10
while(loop):
thread_1.setPosition(10)
thread_2.setPosition(30)
# I want to wait here
thread_1.setPosition(10)
thread_2.setPosition(30)
loop = loop - 1
thread_1.join()
thread_2.join()
Both thread_1 and thread_2 won't finish at the same time depending of the numbers of steps the motor need to process.
I have tried to use the Lock() functionality but I am not sure how to correctly implement it. I also thought about re-creating the Threads but not sure if this is the correct solution.
You can use Semaphore actually:
from threading import Semaphore
class Stepper(Thread):
def __init__(self, stepper, semaphore):
Thread.__init__(self)
self.stepper = stepper
self.semaphore = semaphore
def setPosition(self, pos):
self.q.put(pos)
def run(self):
while not self.q.empty():
try:
# run motor and do some stuff
finally:
self.semaphore.release() # release semaphore when finished one cycle
semaphore = Semaphore(2)
thread_1 = Stepper(myStepper1, semaphore)
thread_2 = Stepper(myStepper2, semaphore)
thread_1.start()
thread_2.start()
loop = 10
for i in range(loop):
semaphore.acquire()
semaphore.acquire()
thread_1.setPosition(10)
thread_2.setPosition(30)
semaphore.acquire()
semaphore.acquire() # wait until the 2 threads both released the semaphore
thread_1.setPosition(10)
thread_2.setPosition(30)
You can use the thread's join method like so:
thread_1.join() # Wait for thread_1 to finish
thread_2.join() # Same for thread_2
As per the documentation at https://docs.python.org/3/library/threading.html#threading.Thread.join:
A thread can be join()ed many times.
To run threads repeatedly, you will need to reinitialize the Thread object after each run.

Python port scanner, how to determine optimal number of threads

I have a multi threaded Python port scanner where each thread in a loop gets something (an IP address/port pair) from a common queue, does some work on it (connects, does a handshake and grabs the server's version) and loops again.
Here's some partial code:
import threading, queue, multiprocessing
class ScanProcess(multiprocessing.Process):
threads = []
def __init__(self, squeue, dqueue, count):
self.squeue = squeue
self.dqueue = dqueue
self.count = count
self._init_threads()
super(ScanProcess, self).__init__()
def _init_threads(self):
self.threads = [ScanThread(self.squeue, self.dqueue) for _ in range(0, self.count)]
def _start_threads(self):
for thread in self.threads:
thread.start()
def _join_threads(self):
for thread in self.threads:
thread.join()
def run(self):
self._start_threads()
self._join_threads()
class ScanThread(threading.Thread):
def __init__(self, squeue, dqueue):
self.squeue = squeue
self.dqueue = dqueue
super(ScanThread, self).__init__()
def run(self):
while not self.squeue.empty():
try:
target = self.squeue.get(block=False)
# do the actual work then put the result in dqueue
except queue.Empty:
continue
# how many threads/processes
process_count = 2
thread_count = 10
# load tasks from file or network and fill the queues
squeue = multiprocessing.Queue()
dqueue = multiprocessing.Queue()
# create and start everything
processes = [ScanProcess(squeue, dqueue, thread_count) for _ in range(0, process_count)]
for process in processes:
process.start()
for process in processes:
process.join()
# enjoy the show!
The problem I've got is that the number of threads is currently set manually. I'd like to set it automatically to saturate the network connection while not dropping packets, but I have no idea how to begin implementing that. Could anyone summarize how nmap/zmap do it?
Any help is appreciated.

Why the threads are not released after all work is consumed from python Queue

I use Queue to provide tasks that threads can work on. After all work is done from Queue, I see the threads are still alive while I expected them being released. Here is my code. You can see the active threads number is increasing after a batch of task(in the same queue) increases from the console. How could I release the threads after a batch of work get done?
import threading
import time
from Queue import Queue
class ThreadWorker(threading.Thread):
def __init__(self, task_queue):
threading.Thread.__init__(self)
self.task_queue = task_queue
def run(self):
while True:
work = self.task_queue.get()
#do some work
# do_work(work)
time.sleep(0.1)
self.task_queue.task_done()
def get_batch_work_done(works):
task_queue = Queue()
for _ in range(5):
t = ThreadWorker(task_queue)
t.setDaemon(True)
t.start()
for work in range(works):
task_queue.put(work)
task_queue.join()
print 'get batch work done'
print 'active threads count is {}'.format(threading.activeCount())
if __name__ == '__main__':
for work_number in range(3):
print 'start with {}'.format(work_number)
get_batch_work_done(work_number)
Do a non blocking read in a loop and use the exception handling to terminate
def run(self):
try:
while True:
work = self.task_queue.get(True, 0.1)
#do some work
# do_work(work)
except Queue.Empty:
print "goodbye"

Categories