Can someone please help me understand the constraints of using threading from within a python process.
I have attached a minimal working example of what I am trying to achieve. My use case requires that I bring up several processes and from within each process I have two threads that need to communicate. However even within the very simplified example below I seem to be running into deadlock / contention and it's not at all clear what is going wrong.
import multiprocessing
from threading import Thread
import logging
import time
import sys
def print_all_the_things(char, num):
try:
while True:
sys.stdout.write(char + str(num))
except Exception:
logging.exception("Something went wrong")
class MyProcess(multiprocessing.Process):
def __init__(self, num):
super(MyProcess, self).__init__()
self.num = num
def run(self):
self.thread1 = Thread(target=print_all_the_things, args=("a", self.num))
self.thread2 = Thread(target=print_all_the_things, args=("b", self.num))
self.thread1.start()
self.thread2.start()
procs = {}
for a in range(2):
procs[a] = MyProcess(a)
procs[a].start()
time.sleep(5)
for a in range(2):
procs[a].join()
The expected output is a mishmash of 'a', 'b', '1' and '2' on stdout. However the program very quickly deadlocks:
$python mwe.py
a0a0a0a0a0a0b0b0a0a0b0b0b0b0b0b0b0b0b0b0a0a0a0a0a0a0a0a1a1a2a2a2a2a2b2b2a2
I should point out that changing MyProcess to inherit from Thread results in a working example.
What am I doing wrong?
The 2 processes are started, they start their threads, then they should
exit since there are no more instructions in run().
But the threads remain in a kind of zombie state, because the 'daemon'
flag has not been set (see Python documentation about this), preventing
the 2 processes to terminate properly.
Just make run() method not finishing just after the threads are started,
for example you can wait on an exit condition:
class MyProcess(multiprocessing.Process):
def __init__(self, num, exit_cond): ### new code
super(MyProcess, self).__init__()
self.num = num
self.exit_cond = exit_cond ### new code
def run(self):
self.thread1 = Thread(target=print_all_the_things, args=("a", self.num))
self.thread2 = Thread(target=print_all_the_things, args=("b", self.num))
self.thread1.daemon=True ### new code
self.thread2.daemon=True ### new code
self.thread1.start()
self.thread2.start()
self.exit_cond.wait() ### new code
procs = {}
exit_cond = multiprocessing.Event() ### new code
for a in range(2):
procs[a] = MyProcess(a, exit_cond)
procs[a].start()
time.sleep(5)
exit_cond.set() ### new code
for a in range(2):
procs[a].join()
Related
I am trying to implement a system where:
Actors generate data
Replay is a class that manage the data generated by the Actors (In theory it does much more than in the code below, but I kept it simple for posting it here)
Learner use the data of the Replay class (and sometimes update some data of Replay)
To implement that, I appended my generated data of the Actors to a multiprocessing.Queue, I use a process to push my data to my Replay. I used a multiprocessing.BaseManager to share the Replay.
This is my implementation (the code is working):
import time
import random
from collections import deque
import torch.multiprocessing as mp
from multiprocessing.managers import BaseManager
T = 20
B = 5
REPLAY_MINIMUM_SIZE = 10
REPLAY_MAXIMUM_SIZE = 100
class Actor:
def __init__(self, global_buffer, rank):
self.rank = rank
self.local_buffer = []
self.global_buffer = global_buffer
def run(self, num_steps):
for step in range(num_steps):
data = f'{self.rank}_{step}'
self.local_buffer.append(data)
if len(self.local_buffer) >= B:
self.global_buffer.put(self.local_buffer)
self.local_buffer = []
class Learner:
def __init__(self, replay):
self.replay = replay
def run(self, num_steps):
while self.replay.size() <= REPLAY_MINIMUM_SIZE:
time.sleep(0.1)
for step in range(num_steps):
batch = self.replay.sample(B)
print(batch)
class Replay:
def __init__(self, capacity):
self.memory = deque(maxlen=capacity)
def push(self, experiences):
self.memory.extend(experiences)
def sample(self, n):
return random.sample(self.memory, n)
def size(self):
return len(self.memory)
def send_data_to_replay(global_buffer, replay):
while True:
if not global_buffer.empty():
batch = global_buffer.get()
replay.push(batch)
if __name__ == '__main__':
num_actors = 2
global_buffer = mp.Queue()
BaseManager.register("ReplayMemory", Replay)
Manager = BaseManager()
Manager.start()
replay = Manager.ReplayMemory(REPLAY_MAXIMUM_SIZE)
learner = Learner(replay)
learner_process = mp.Process(target=learner.run, args=(T,))
learner_process.start()
actor_processes = []
for rank in range(num_actors):
p = mp.Process(target=Actor(global_buffer, rank).run, args=(T,))
p.start()
actor_processes.append(p)
replay_process = mp.Process(target=send_data_to_replay, args=(global_buffer, replay,))
replay_process.start()
learner_process.join()
[actor_process.join() for actor_process in actor_processes]
replay_process.join()
I followed several tutorials and read websites related to multiprocessing, but I am very new to distributed computing. I am not sure if what I am doing is right.
I wanted to know if there is some malpractice in my code or things that are not following good practices. Moreover, when I launch the program, the different processes do not terminate. And I am not sure why and how to handle it.
Any feedback would be appreciated!
I find that when working with multiprocessing it is best to have a Queue for each running process. When you are ready to close the application you can send an exit message ( or poison pill ) to each queue and close each process cleanly.
When you launch a child process pass the parent queue and the child queue to the new process through inheritance.
I have a class (MyClass) which contains a queue (self.msg_queue) of actions that need to be run and I have multiple sources of input that can add tasks to the queue.
Right now I have three functions that I want to run concurrently:
MyClass.get_input_from_user()
Creates a window in tkinter that has the user fill out information and when the user presses submit it pushes that message onto the queue.
MyClass.get_input_from_server()
Checks the server for a message, reads the message, and then puts it onto the queue. This method uses functions from MyClass's parent class.
MyClass.execute_next_item_on_the_queue()
Pops a message off of the queue and then acts upon it. It is dependent on what the message is, but each message corresponds to some method in MyClass or its parent which gets run according to a big decision tree.
Process description:
After the class has joined the network, I have it spawn three threads (one for each of the above functions). Each threaded function adds items from the queue with the syntax "self.msg_queue.put(message)" and removes items from the queue with "self.msg_queue.get_nowait()".
Problem description:
The issue I am having is that it seems that each thread is modifying its own queue object (they are not sharing the queue, msg_queue, of the class of which they, the functions, are all members).
I am not familiar enough with Multiprocessing to know what the important error messages are; however, it is stating that it cannot pickle a weakref object (it gives no indication of which object is the weakref object), and that within the queue.put() call the line "self._sem.acquire(block, timeout) yields a '[WinError 5] Access is denied'" error. Would it be safe to assume that this failure in the queue's reference not copying over properly?
[I am using Python 3.7.2 and the Multiprocessing package's Process and Queue]
[I have seen multiple Q/As about having threads shuttle information between classes--create a master harness that generates a queue and then pass that queue as an argument to each thread. If the functions didn't have to use other functions from MyClass I could see adapting this strategy by having those functions take in a queue and use a local variable rather than class variables.]
[I am fairly confident that this error is not the result of passing my queue to the tkinter object as my unit tests on how my GUI modifies its caller's queue work fine]
Below is a minimal reproducible example for the queue's error:
from multiprocessing import Queue
from multiprocessing import Process
import queue
import time
class MyTest:
def __init__(self):
self.my_q = Queue()
self.counter = 0
def input_function_A(self):
while True:
self.my_q.put(self.counter)
self.counter = self.counter + 1
time.sleep(0.2)
def input_function_B(self):
while True:
self.counter = 0
self.my_q.put(self.counter)
time.sleep(1)
def output_function(self):
while True:
try:
var = self.my_q.get_nowait()
except queue.Empty:
var = -1
except:
break
print(var)
time.sleep(1)
def run(self):
process_A = Process(target=self.input_function_A)
process_B = Process(target=self.input_function_B)
process_C = Process(target=self.output_function)
process_A.start()
process_B.start()
process_C.start()
# without this it generates the WinError:
# with this it still behaves as if the two input functions do not modify the queue
process_C.join()
if __name__ == '__main__':
test = MyTest()
test.run()
Indeed - these are not "threads" - these are "processes" - while if you were using multithreading, and not multiprocessing, the self.my_q instance would be the same object, placed at the same memory space on the computer,
multiprocessing does a fork of the process, and any data in the original process (the one in execution in the "run" call) will be duplicated when it is used - so, each subprocess will see its own "Queue" instance, unrelated to the others.
The correct way to have various process share a multiprocessing.Queue object is to pass it as a parameter to the target methods. The simpler way to reorganize your code so that it works is thus:
from multiprocessing import Queue
from multiprocessing import Process
import queue
import time
class MyTest:
def __init__(self):
self.my_q = Queue()
self.counter = 0
def input_function_A(self, queue):
while True:
queue.put(self.counter)
self.counter = self.counter + 1
time.sleep(0.2)
def input_function_B(self, queue):
while True:
self.counter = 0
queue.put(self.counter)
time.sleep(1)
def output_function(self, queue):
while True:
try:
var = queue.get_nowait()
except queue.Empty:
var = -1
except:
break
print(var)
time.sleep(1)
def run(self):
process_A = Process(target=self.input_function_A, args=(queue,))
process_B = Process(target=self.input_function_B, args=(queue,))
process_C = Process(target=self.output_function, args=(queue,))
process_A.start()
process_B.start()
process_C.start()
# without this it generates the WinError:
# with this it still behaves as if the two input functions do not modify the queue
process_C.join()
if __name__ == '__main__':
test = MyTest()
test.run()
As you can see, since your class is not actually sharing any data through the instance's attributes, this "class" design does not make much sense for your application - but for grouping the different workers in the same code block.
It would be possible to have a magic-multiprocess-class that would have some internal method to actually start the worker-methods and share the Queue instance - so if you have a lot of those in a project, there would be a lot less boilerplate.
Something along:
from multiprocessing import Queue
from multiprocessing import Process
import time
class MPWorkerBase:
def __init__(self, *args, **kw):
self.queue = None
self.is_parent_process = False
self.is_child_process = False
self.processes = []
# ensure this can be used as a colaborative mixin
super().__init__(*args, **kw)
def run(self):
if self.is_parent_process or self.is_child_process:
# workers already initialized
return
self.queue = Queue()
processes = []
cls = self.__class__
for name in dir(cls):
method = getattr(cls, name)
if callable(method) and getattr(method, "_MP_worker", False):
process = Process(target=self._start_worker, args=(self.queue, name))
self.processes.append(process)
process.start()
# Setting these attributes here ensure the child processes have the initial values for them.
self.is_parent_process = True
self.processes = processes
def _start_worker(self, queue, method_name):
# this method is called in a new spawned process - attribute
# changes here no longer reflect attributes on the
# object in the initial process
# overwrite queue in this process with the queue object sent over the wire:
self.queue = queue
self.is_child_process = True
# call the worker method
getattr(self, method_name)()
def __del__(self):
for process in self.processes:
process.join()
def worker(func):
"""decorator to mark a method as a worker that should
run in its own subprocess
"""
func._MP_worker = True
return func
class MyTest(MPWorkerBase):
def __init__(self):
super().__init__()
self.counter = 0
#worker
def input_function_A(self):
while True:
self.queue.put(self.counter)
self.counter = self.counter + 1
time.sleep(0.2)
#worker
def input_function_B(self):
while True:
self.counter = 0
self.queue.put(self.counter)
time.sleep(1)
#worker
def output_function(self):
while True:
try:
var = self.queue.get_nowait()
except queue.Empty:
var = -1
except:
break
print(var)
time.sleep(1)
if __name__ == '__main__':
test = MyTest()
test.run()
I have been writing a program which runs a remote script on server. So, I need to show the progress with a bar but somehow when I run my code, GUI start to freeze. I have used QThread and SIGNAL but unfortunately couldnt be succedeed.
Here is my code below;
class dumpThread(QThread):
def __init__(self):
QThread.__init__(self)
def __del__(self):
self.wait()
def sendEstablismentCommands(self, connection):
# Commands are sending sequently with proper delay-timers #
connection.sendShell("telnet localhost 21000")
time.sleep(0.5)
connection.sendShell("admin")
time.sleep(0.5)
connection.sendShell("admin")
time.sleep(0.5)
connection.sendShell("cd imdb")
time.sleep(0.5)
connection.sendShell("dump subscriber")
command = input('$ ')
def run(self):
# your logic here
# self.emit(QtCore.SIGNAL('THREAD_VALUE'), maxVal)
self.sendEstablismentCommands(connection)
class progressThread(QThread):
def __init__(self):
QThread.__init__(self)
def __del__(self):
self.wait()
def run(self):
# your logic here
while 1:
maxVal = 100
self.emit(SIGNAL('PROGRESS'), maxVal)
class Main(QtGui.QMainWindow):
def __init__(self):
QtGui.QMainWindow.__init__(self)
self.ui = Ui_MainWindow()
self.ui.setupUi(self)
self.ui.connectButton.clicked.connect(self.connectToSESM)
def connectToSESM(self):
## Function called when pressing connect button, input are being taken from edit boxes. ##
## dumpThread() method has been designed for working thread seperate from GUI. ##
# Connection data are taken from "Edit Boxes"
# username has been set as hardcoded
### Values Should Be Defined As Global ###
username = "ntappadm"
password = self.ui.passwordEdit.text()
ipAddress = self.ui.ipEdit.text()
# Connection has been established through paramiko shell library
global connection
connection = pr.ssh(ipAddress, username, password)
connection.openShell()
pyqtRemoveInputHook() # For remove unnecessary items from console
global get_thread
get_thread = dumpThread() # Run thread - Dump Subscriber
self.progress_thread = progressThread()
self.progress_thread.start()
self.connect(self.progress_thread, SIGNAL('PROGRESS'), self.updateProgressBar)
get_thread.start()
def updateProgressBar(self, maxVal):
for i in range(maxVal):
self.ui.progressBar.setValue(self.ui.progressBar.value() + 1)
time.sleep(1)
maxVal = maxVal - 1
if maxVal == 0:
self.ui.progressBar.setValue(100)
def parseSubscriberList(self):
parsing = reParser()
def done(self):
QtGui.QMessageBox.information(self, "Done!", "Done fetching posts!")
if __name__ == "__main__":
app = QtGui.QApplication(sys.argv)
main = Main()
main.show()
sys.exit(app.exec_())
I am expecting to see updateProgressBar method has called with SIGNAL, so process goes through seperate thread. I coudlnt find where I am missing.
Thanks for any help
There are really two problems. One thing I have noticed is that Python threads are greedy if they are not used for IO operations like reading from a serial port. If you tell a thread to run a calculation or something that is not IO related a thread will take up all of the processing and doesn't like to let the main thread / event loop run. The second problem is that signals are slow ... very slow. I've noticed that if you emit a signal from a thread and do it very fast it can drastically slow down a program.
So at the heart of the issue, the thread is taking up all of the time and you are emitting a signal very very fast which will cause slow downs.
For clarity and ease of use I would use the new style signal and slots.
http://pyqt.sourceforge.net/Docs/PyQt4/new_style_signals_slots.html
class progressThread(QThread):
progress_update = QtCore.Signal(int) # or pyqtSignal(int)
def __init__(self):
QThread.__init__(self)
def __del__(self):
self.wait()
def run(self):
# your logic here
while 1:
maxVal = 100
self.progress_update.emit(maxVal) # self.emit(SIGNAL('PROGRESS'), maxVal)
# Tell the thread to sleep for 1 second and let other things run
time.sleep(1)
To connect to the new style signal
...
self.progress_thread.start()
self.process_thread.progress_update.connect(self.updateProgressBar) # self.connect(self.progress_thread, SIGNAL('PROGRESS'), self.updateProgressBar)
...
EDIT
Sorry there is another problem. When a signal calls a function you cannot stay in that function forever. That function is not running in a separate thread it is running on the main event loop and the main event loop waits to run until you exit that function.
Update progress sleeps for 1 second and keeps looping. The hanging is coming from staying in this function.
def updateProgressBar(self, maxVal):
for i in range(maxVal):
self.ui.progressBar.setValue(self.ui.progressBar.value() + 1)
time.sleep(1)
maxVal = maxVal - 1
if maxVal == 0:
self.ui.progressBar.setValue(100)
It would be better to write the progress bar like
class progressThread(QThread):
progress_update = QtCore.Signal(int) # or pyqtSignal(int)
def __init__(self):
QThread.__init__(self)
def __del__(self):
self.wait()
def run(self):
# your logic here
while 1:
maxVal = 1 # NOTE THIS CHANGED to 1 since updateProgressBar was updating the value by 1 every time
self.progress_update.emit(maxVal) # self.emit(SIGNAL('PROGRESS'), maxVal)
# Tell the thread to sleep for 1 second and let other things run
time.sleep(1)
def updateProgressBar(self, maxVal):
self.ui.progressBar.setValue(self.ui.progressBar.value() + maxVal)
if maxVal == 0:
self.ui.progressBar.setValue(100)
I have a python GUI program that needs to do a same task but with several threads. The problem is that I call the threads but they don't execute parallel but sequentially. First one executes, it ends and then second one, etc. I want them to start independently.
The main components are:
1. Menu (view)
2. ProcesStarter (controller)
3. Process (controller)
The Menu is where you click on the "Start" button which calls a function at ProcesStarter.
The ProcesStarter creates objects of Process and threads, and starts all threads in a for-loop.
Menu:
class VotingFrame(BaseFrame):
def create_widgets(self):
self.start_process = tk.Button(root, text="Start Process", command=lambda: self.start_process())
self.start_process.grid(row=3,column=0, sticky=tk.W)
def start_process(self):
procesor = XProcesStarter()
procesor_thread = Thread(target=procesor.start_process())
procesor_thread.start()
ProcesStarter:
class XProcesStarter:
def start_process(self):
print "starting new process..."
# thread count
thread_count = self.get_thread_count()
# initialize Process objects with data, and start threads
for i in range(thread_count):
vote_process = XProcess(self.get_proxy_list(), self.get_url())
t = Thread(target=vote_process.start_process())
t.start()
Process:
class XProcess():
def __init__(self, proxy_list, url, browser_show=False):
# init code
def start_process(self):
# code for process
When I press the GUI button for "Start Process" the gui is locked until both threads finish execution.
The idea is that threads should work in the background and work in parallel.
you call procesor.start_process() immediately when specifying it as the target of the Thread:
#use this
procesor_thread = Thread(target=procesor.start_process)
#not this
procesor_thread = Thread(target=procesor.start_process())
# this is called right away ^
If you call it right away it returns None which is a valid target for Thread (it just does nothing) which is why it happens sequentially, the threads are not doing anything.
One way to use a class as the target of a thread is to use the class as the target, and the arguments to the constructor as args.
from threading import Thread
from time import sleep
from random import randint
class XProcesStarter:
def __init__(self, thread_count):
print ("starting new process...")
self._i = 0
for i in range(thread_count):
t = Thread(
target=XProcess,
args=(self.get_proxy_list(), self.get_url())
)
t.start()
def get_proxy_list(self):
self._i += 1
return "Proxy list #%s" % self._i
def get_url(self):
self._i += 1
return "URL #%d" % self._i
class XProcess():
def __init__(self, proxy_list, url, browser_show=False):
r = 0.001 * randint( 1, 5000)
sleep(r)
print (proxy_list)
print (url)
def main():
t = Thread( target=XProcesStarter, args=(4, ) )
t.start()
if __name__ == '__main__':
main()
This code runs in python2 and python3.
The reason is that the target of a Thread object must be a callable (search for "callable" and "__call__" in python documentation for a complete explanation).
Edit The other way has been explained in other people's answers (see Tadhg McDonald-Jensen).
I think your issue is that in both places you're starting threads, you're actually calling the method you want to pass as the target to the thread. That runs its code in the main thread (and tries to start the new thread on the return value, if any, once its done).
Try:
procesor_thread = Thread(target=procesor.start_process) # no () after start_process
And:
t = Thread(target=vote_process.start_process) # no () here either
I'm using a library which heaviliy uses I/O. For that reason calls to that library can last very long (more than 5 seconds) possible.
Using that directly inside an UI is not a good idea because it will freeze.
For that reason I outsourced the library calls to a thread queue like shown in this example: Python threads: communication and stopping
Nevertheless I'm not very happy with that solution since this has a major drawback:
I cannot really communicate with the UI.
Every lib command returns a return message, which can either be an error message or some computational result.
How would I get this?
Consider a library call do_test(foo):
def do_test(foo):
time.sleep(10)
return random.random() * foo
def ui_btn_click():
threaded_queue.put((do_test, 42))
# Now how to display the result without freezing the UI?
Can someone give me advice how to realize such a pattern?
Edit:
This here is a minimal example:
import os, time, random
import threading, queue
CMD_FOO = 1
CMD_BAR = 2
class ThreadedQueue(threading.Thread):
def __init__(self):
super().__init__()
self.in_queue = queue.Queue()
self.out_queue = queue.Queue()
self.__stoprequest = threading.Event()
def run(self):
while not self.__stoprequest.isSet():
(cmd, arg) = self.in_queue.get(True)
if cmd == CMD_FOO:
ret = self.handle_foo(arg)
elif cmd == CMD_BAR:
ret = self.handle_bar(arg)
else:
print("Unsupported cmd {0}".format(cmd))
self.out_queue.put(ret)
self.in_queue.task_done()
def handle_foo(self, arg):
print("start handle foo")
time.sleep(10)
return random.random() * arg
def handle_bar(self, arg):
print("start handle bar")
time.sleep(2)
return (random.random() * arg, 2 * arg)
if __name__ == "__main__":
print("START")
t = ThreadedQueue()
t.start()
t.in_queue.put((CMD_FOO, 10))
t.in_queue.put((CMD_BAR, 10))
print("Waiting")
while True:
x = t.out_queue.get(True)
t.out_queue.task_done()
print(x)
I personally use PySide but I don't want to depend this library on PySide or any other ui-related library.
I thought a bit about my implementations. THe conclusion is that I start another thread for picking the results of the queue:
class ReceiveThread(threading.Thread):
"""
Processes the output queue and calls a callback for each message
"""
def __init__(self, queue, callback):
super().__init__()
self.__queue = queue
self.__callback = callback
self.__stoprequest = threading.Event()
self.start()
def run(self):
while not self.__stoprequest.isSet():
ret = self.__queue.get(True)
self.__callback(ret)
self.__queue.task_done()
The given callback from an UI or elsewhere is called with every result from the queue.