Thread wait communication - python

I have a python script which calls plugins on a separate thread. The plugins must execute some commands and wait for a signal from the main script before they can proceed.
I used wait() in the plugin and set and clear in the calling script but I guess that once the plugin thread is called the main script waits for the thread to complete before continuing. Thus the set and wait are never called and the program hangs. I have attached a simplified version of code.
#!/usr/bin/python
import threading, os, sys, re, imp
e = threading.Event()
class PluginLoader():
# atexit.register(detective.terminate())
## getPlugins - Locate all plugins with plugin directory
# #param self Class object pointer
# #param moduleName Name of module
def getPlugins(self, moduleName):
try:
# Folder in which the plugins are stored
self.pluginFolder = "/tmp/plugins"
# Give the value of the main module in json file. (This is the name omitting the .py extension)
self.mainModule = moduleName[0].strip(" ")
# Load plugin array
plugins = []
possibleplugins = os.listdir(self.pluginFolder)
# Iterate over plugins to determine applicable plugin
for i in possibleplugins:
location=os.path.join(self.pluginFolder, i)
# Skip if not directory or plugin not in directory
if not os.path.isdir(location) or not self.mainModule + ".py" in os.listdir(location):
continue
# Otherwise, find the module
info = imp.find_module(self.mainModule, [location])
plugins.append({"name": i, "info": info})
return plugins
except OSError:
print "File or folder not found"
## loadPlugin - Load plugin into script
# #param self Class object pointer
# #param plugin Plugin object pointer
# #return Plugin object
def loadPlugin(self, plugin):
return imp.load_module(self.mainModule, *plugin["info"])
class Threads:
def run(self):
self.tuck()
raw_input("press entr")
e.set()
def tuck(self):
moduleName= ["hello"]
for i in pluginLoader.getPlugins(moduleName):
plugin = pluginLoader.loadPlugin(i)
threading.Thread(name = "block", target=plugin.run(e)).start()
e.set()
e.clear()
pluginLoader=PluginLoader()
t= Threads()
t.run()
the following script must be copied into a /tmp/plugins directory and named hello.py
#!/usr/bin/python
from thre import *
class wait:
flag = ""
def run(self,e):
self.flag = e
print "in main thread"
self.prints(e)
e.wait()
threading.Thread(target=self.prints12).start()
def prints(self,e):
for i in xrange(10):
print "world"
def prints12(self):
for i in xrange(10):
print "Hey"
w = wait()
def run (e) :
w.run(e)

The main problem seems to be here:
threading.Thread(name = "block", target=plugin.run(e)).start()
plugin.run(e) will directly call the run method of the plugin and block, therefore deadlocking your program. This probably should be:
threading.Thread(name = "block", target=plugin.run, args=(e,)).start()

Related

Packaging python modules having MultiProcessing Code

I am trying to package my python project into an executable using pyinstaller. The main module contains code for multiprocessing. When I run the executable, only the lines of code prior to the multi processing part get executed again and again. Neither does it throw an exception or exit the program.
Code in main module:
from Framework.ExcelUtility import ExcelUtility
from Framework.TestRunner import TestRunner
import concurrent.futures
class Initiator:
def __init__(self):
self.exec_config_dict = {}
self.test_list = []
self.test_names = []
self.current_test_set = []
def set_first_execution_order(self):
# Code
def set_subsequent_execution_order(self):
# Code
def kick_off_tests(self):
'''Method to do Multi process execution'''
if(__name__=="__main__"):
with concurrent.futures.ProcessPoolExecutor(max_workers=int(self.exec_config_dict.get('Parallel'))) as executor:
for test in self.current_test_set:
executor.submit(TestRunner().runner,test) ***This line is not being executed from the exe file.
initiator = Initiator()
initiator.get_run_info()
initiator.set_first_execution_order()
initiator.kick_off_tests()
while len(initiator.test_list) > 0:
initiator.set_subsequent_execution_order()
try:
initiator.kick_off_tests()
except BaseException as exception:
print(exception)
From the problem definition I'm assuming you are using ms-windows, and that the main module is not named __main__.py.
In that case, multiprocessing has some special guidelines:
Make sure that the main module can be safely imported by a new Python interpreter without causing unintended side effects (such a starting a new process).
and
Instead one should protect the “entry point” of the program by using if __name__ == '__main__'
So, change the last part of your main module like this:
from multiprocessing import freeze_support
def kick_off_tests(self):
'''Method to do Multi process execution'''
with concurrent.futures.ProcessPoolExecutor(max_workers=int(self.exec_config_dict.get('Parallel'))) as executor:
for test in self.current_test_set:
executor.submit(TestRunner().runner,test)
if __name__ == '__main__':
freeze_support()
initiator = Initiator()
initiator.get_run_info()
initiator.set_first_execution_order()
initiator.kick_off_tests()
while len(initiator.test_list) > 0:
initiator.set_subsequent_execution_order()
try:
initiator.kick_off_tests()
except BaseException as exception:
print(exception)

Stop one Python script that is running within another

I have a Python app that initiates from a main script, let's say a main.py. main.py (since my app is organized) references and imports other .py files within the same directory, that house other functions. As my app is continuously running, it imports such a function from another script, which is also supposed to run forever until it is explicitly cancelled.
Thing is, how would I cancel that specific script, while leaving its affected variables untouched and the main script/larger app still running?
I do not how I would go about targeting a specific function to stop its execution.
I use a kill function in my utils to kill any unneeded python process who's name I know. Note the following code was tested/works on Ubuntu Linux and Mac OS machines.
def get_running_pids(process_name):
pids = []
p = subprocess.Popen(['ps', '-A'], stdout=subprocess.PIPE)
out, err = p.communicate()
for line in out.splitlines():
if process_name in line.decode('utf-8'):
pid = int(line.decode('utf-8').split(None, 1)[0])
pids.append(pid)
return pids
def kill_process_with_name(process_name):
pids = get_running_pids(process_name)
for pid in pids:
os.kill(pid, signal.SIGKILL)
You Could set up user defined, custom, Exceptions. Extending Pythons builtin Exception object. Further reading here : Pythons User Defined Exceptions
CustomExceptions.py:
class HaltException(Exception):
pass
-
main.py:
from CustomExceptions import HaltException
class Functions():
def a(self):
print("hey")
self.b()
return "1"
def b(self):
print("hello")
raise HaltException()
def main():
func_obj = Functions()
try:
func_obj.a()
except HaltException as e:
pass
print("Awesome")
main()
Programs may name their own exceptions by creating a new exception
class (see Classes for more about Python classes). Exceptions should
typically be derived from the Exception class, either directly or
indirectly.

How to run class in separated thread Python?

I have two classes in Python script. One of is Main() and the second is Loading()
class Main:
pass
class Loading:
pass
At first, works Main() that return was filled dictionary
Then is created instance of Loading() that iterates all images and downloads them:
## LOAD IMAGES ##
imageLoader = Loading()
imageLoader.save()
So, the problem is when I call this script it creates one major thread that is waiting for the end of imageLoader = Loading().
As a result, a major thread works so long, it invokes 502 Server error.
How to run imageLoader = Loading() in a separate background thread that to release major thread?
What will be launched first in this code:
LOADED_IMAGES = {}
IMAGES_ERRORS = []
IMAGES = {"A": "https://images.aif.ru/009/299/3378e1a1ab2d1c6e6be6d38253dd3632.jpg", "B": "http://static1.repo.aif.ru/1/77/623957/b99ee5f894f38261e4d3778350ffbaae.jpg"}
excel = Excel()
excel.readExcel(file_path, 'Main')
imageLoader = ImageLoader()
Thread(target=imageLoader.run().save()).start()
Does it work line by line or Thread will be created immediately?
**This is full code:**
class ImageLoader:
def run(self):
for article, image in IMAGES.items():
if image is None or image == '':
continue
LOADED_IMAGES[article] = self.loadImage(self.replaceHttpsProtocol(image), '/home/o/oliwin4/jara/public_html/image/catalog/s/')
def replaceHttpsProtocol(self, url):
return url.replace("https:", "http:")
def nameNameGenerate(self):
return int(round(time.time() * 1000))
def extention(self, path):
ext = path.split(".")[-1]
return '.' + ext if ext else "jpg"
def save(self):
for article, image in LOADED_IMAGES.items():
self.add(article, image)
def add(self, article, image):
Products.update(image=image).where(Products.sku == article).execute()
def loadImage(self, path, path_folder):
try:
filename = str(self.nameNameGenerate()) + str(self.extention(path))
wget.download(url=path, out=path_folder + filename)
return 'catalog/s/' + filename
except BaseException as e:
IMAGES_ERRORS.append(str(e))
Using:
def runOnThread():
imageLoader = ImageLoader()
imageLoader.run()
imageLoader.save()
if __name__ == "__main__":
Thread(target=runOnThread, daemon=True).start()
You need to look for which line is blocking your code to run it in a separated thread, usually the blocking line is some kind of I/O or expensive computation.
To do it you can use the threading module.
So, assuming that your blocking line is the
imageLoader.save()
Try to run it in a separated thread with this code.
from threading import Thread
Thread(target=imageLoader.save()).start()
As mentioned, you can use Python's threading module for this. Though, a thread takes a reference to a function (passing target a function call is useless / wrong).
In your case, if you want to both instantiate then run a function on an object in a separate thread, you should put these two in a function:
def runOnThread():
imageLoader = Loading()
imageLoader.save()
Then pass a reference of this function to a new thread, like so (notice no ()):
from threading import Thread
Thread(target=runOnThread).start()
If you don't want the main thread to wait for the new thread to finish, you could make it a Daemon thread, like so:
Thread(target=runOnThread, daemon=True).start()
Shorter version of all the above:
from threading import Thread
Thread(target=lambda: Loading().save(), daemon=True).start()

os.chdir between multiple python processes

I have a complex python pipeline (which code I cant change), calling multiple other scripts and other executables. The point is it takes ages to run over 8000 directories, doing some scientific analyses. So, I wrote a simple wrapper, (might not be most effective, but seems to work) using the multiprocessing module.
from os import path, listdir, mkdir, system
from os.path import join as osjoin, exists, isfile
from GffTools import Gene, Element, Transcript
from GffTools import read as gread, write as gwrite, sort as gsort
from re import match
from multiprocessing import JoinableQueue, Process
from sys import argv, exit
# some absolute paths
inbase = "/.../abfgp_in"
outbase = "/.../abfgp_out"
abfgp_cmd = "python /.../abfgp-2.rev/abfgp.py"
refGff = "/.../B0510_manual_reindexed_noSeq.gff"
# the Queue
Q = JoinableQueue()
i = 0
# define number of processes
try: num_p = int(argv[1])
except ValueError: exit("Wrong CPU argument")
# This is the function calling the abfgp.py script, which in its turn calls alot of third party software
def abfgp(id_, pid):
out = osjoin(outbase, id_)
if not exists(out): mkdir(out)
# logfile
log = osjoin(outbase, "log_process_%s" %(pid))
try:
# call the script
system("%s --dna %s --multifasta %s --target %s -o %s -q >>%s" %(abfgp_cmd, osjoin(inbase, id_, id_ +".dna.fa"), osjoin(inbase, id_, "informants.mfa"), id_, out, log))
except:
print "ABFGP FAILED"
return
# parse the output
def extractGff(id_):
# code not relevant
# function called by multiple processes, using the Queue
def run(Q, pid):
while not Q.empty():
try:
d = Q.get()
print "%s\t=>>\t%s" %(str(i-Q.qsize()), d)
abfgp(d, pid)
Q.task_done()
except KeyboardInterrupt:
exit("Interrupted Child")
# list of directories
genedirs = [d for d in listdir(inbase)]
genes = gread(refGff)
for d in genedirs:
i += 1
indir = osjoin(inbase, d)
outdir = osjoin(outbase, d)
Q.put(d)
# this loop creates the multiple processes
procs = []
for pid in range(num_p):
try:
p = Process(target=run, args=(Q, pid+1))
p.daemon = True
procs.append(p)
p.start()
except KeyboardInterrupt:
print "Aborting start of child processes"
for x in procs:
x.terminate()
exit("Interrupted")
try:
for p in procs:
p.join()
except:
print "Terminating child processes"
for x in procs:
x.terminate()
exit("Interrupted")
print "Parsing output..."
for d in genedirs: extractGff(d)
Now the problem is, abfgp.py uses the os.chdir function, which seems to disrupt the parallel processing. I get a lot of errors, stating that some (input/output) files/directories cannot be found for reading/writing. Even though I call the script through os.system(), from which I though spawning separate processes would prevent this.
How can I work around these chdir interference?
Edit: I might change os.system() to subprocess.Popen(cwd="...") with the right directory. I hope this makes a difference.
Thanks.
Edit 2
Do not use os.system() use subprocess.call()
system("%s --dna %s --multifasta %s --target %s -o %s -q >>%s" %(abfgp_cmd, osjoin(inbase, id_, id_ +".dna.fa"), osjoin(inbase, id_, "informants.mfa"), id_, out, log))
would translate to
subprocess.call((abfgp_cmd, '--dna', osjoin(inbase, id_, id_ +".dna.fa"), '--multifasta', osjoin(inbase, id_, "informants.mfa"), '--target', id_, '-o', out, '-q')) # without log.
Edit 1
I think the problem is that multiprocessing is using the module names to serialize functions, classes.
This means if you do import module where module is in ./module.py and the you do something like os.chdir('./dir') now you would need to from .. import module.
The child processes inherit the folder of the parent process. This may be a problem.
Solutions
Make sure that all modules are imported (in the child processes) and after this you change the directory
insert the original os.getcwd() to sys.path to enable import from the original directory. This must be done before any functions are called from the local directory.
put all functions that you use inside a directory that can always be imported. The site-packages could be such a directory. Then you can do something like import module module.main() to start what you do.
This is a hack that I do because I know how pickle works. Only use this if other attempts fail.
The script prints:
serialized # the function runD is serialized
string executed # before the function is loaded the code is executed
loaded # now the function run is deserialized
run # run is called
In you case you would do something like this:
runD = evalBeforeDeserialize('__import__("sys").path.append({})'.format(repr(os.getcwd())), run)
p = Process(target=runD, args=(Q, pid+1))
This is the script:
# functions that you need
class R(object):
def __init__(self, call, *args):
self.ret = (call, args)
def __reduce__(self):
return self.ret
def __call__(self, *args, **kw):
raise NotImplementedError('this should never be called')
class evalBeforeDeserialize(object):
def __init__(self, string, function):
self.function = function
self.string = string
def __reduce__(self):
return R(getattr, tuple, '__getitem__'), \
((R(eval, self.string), self.function), -1)
# code to show how it works
def printing():
print('string executed')
def run():
print('run')
runD = evalBeforeDeserialize('__import__("__main__").printing()', run)
import pickle
s = pickle.dumps(runD)
print('serialized')
run2 = pickle.loads(s)
print('loaded')
run2()
Please report back if these do not work.
You could determine which instance of the os library the unalterable program is using; then create a tailored version of chdir in that library that does what you need -- prevent the directory change, log it, whatever. If the tailored behavior needs to be just for the single program, you can use the inspect module to identify the caller and tailor the behavior in a specific way for just that caller.
Your options are limited if you truly can't alter the existing program; but if you have the option of altering libraries it imports, something like this could be a least-invasive way to skirt the undesired behavior.
Usual caveats apply when altering a standard library.

Python, Quickly and Glade, showing stdout in a TextView

I've spent ages looking for a way to do this, and I've so far come up with nothing. :(
I'm trying to make a GUI for a little CLI program that I've made - so I thought using Ubuntu's "Quickly" would be the easiest way. Basically it appears to use Glade for making the GUI. I know that I need to run my CLI backend in a subprocess and then send the stdout and stderr to a textview. But I can't figure out how to do this.
This is the code that Glade/Quickly created for the Dialog box that I want the output to appear into:
from gi.repository import Gtk # pylint: disable=E0611
from onice_lib.helpers import get_builder
import gettext
from gettext import gettext as _
gettext.textdomain('onice')
class BackupDialog(Gtk.Dialog):
__gtype_name__ = "BackupDialog"
def __new__(cls):
"""Special static method that's automatically called by Python when
constructing a new instance of this class.
Returns a fully instantiated BackupDialog object.
"""
builder = get_builder('BackupDialog')
new_object = builder.get_object('backup_dialog')
new_object.finish_initializing(builder)
return new_object
def finish_initializing(self, builder):
"""Called when we're finished initializing.
finish_initalizing should be called after parsing the ui definition
and creating a BackupDialog object with it in order to
finish initializing the start of the new BackupDialog
instance.
"""
# Get a reference to the builder and set up the signals.
self.builder = builder
self.ui = builder.get_ui(self)
self.test = False
def on_btn_cancel_now_clicked(self, widget, data=None):
# TODO: Send SIGTERM to the subprocess
self.destroy()
if __name__ == "__main__":
dialog = BackupDialog()
dialog.show()
Gtk.main()
If I put this in the finish_initializing function
backend_process = subprocess.Popen(["python", <path to backend>], stdout=subprocess.PIPE, shell=False)
then the process starts and runs as another PID, which is what I want, but now how do I send backend_process.stdout to the TextView? I can write to the textview with:
BackupDialog.ui.backup_output.get_buffer().insert_at_cursor("TEXT")
But I just need to know how to have this be called each time there is a new line of stdout.
But I just need to know how to have this be called each time there is a new line of stdout.
You could use GObject.io_add_watch to monitor the subprocess output or create a separate thread to read from the subprocess.
# read from subprocess
def read_data(source, condition):
line = source.readline() # might block
if not line:
source.close()
return False # stop reading
# update text
label.set_text('Subprocess output: %r' % (line.strip(),))
return True # continue reading
io_id = GObject.io_add_watch(proc.stdout, GObject.IO_IN, read_data)
Or using a thread:
# read from subprocess in a separate thread
def reader_thread(proc, update_text):
with closing(proc.stdout) as file:
for line in iter(file.readline, b''):
# execute update_text() in GUI thread
GObject.idle_add(update_text, 'Subprocess output: %r' % (
line.strip(),))
t = Thread(target=reader_thread, args=[proc, label.set_text])
t.daemon = True # exit with the program
t.start()
Complete code examples.

Categories