Mocking an I/O event in Python - python

My code is listening for file changes in a folder, in class A. When a change occurs, then I trigger a function of class B, which is a field in class A.
class A:
def __init__(self, b):
...
self.handler = b
def run(self):
# listen for changes in a folder using watchdog.observers.Observer
self.observer.schedule(self.handler, self.input_directory, recursive=True)
self.observer.start()
try:
while not self.stopped:
time.sleep(self.scanning_frequency)
except:
self.observer.stop()
self.observer.join()
class B(FileSystemEventHandler):
...
def on_any_event(self, event):
# a change occurred and handled here.
Now what I want to test is that when a file is copied to this folder, then on_any_event should be triggered. This is how I tried to do that:
def test_file_watcher(self):
# arrange
b = B()
a = A(b)
a.handler.on_any_event = MagicMock()
shutil.copy(# copy file to the watched folder)
p1 = Process(target=a.run)
p1.start()
time.sleep(folder scanning frequency + 1 second)
a.stop() # stops watching the folder
assert a.handler.on_any_event.called
p1.join()
However this assertion turns out to be false all the time. Where am I doing wrong exactly? Also would it be possible to achieve this by also mocking B completely?
Edit: I think the reason could be that I am using a different process, therefore a.handler.on_any_event.called is always false. But I couldn't figure out how to solve this.

Workaround for your test in a multiprocessing context
I agree with you that multiprocessing causes the failure of the test. I have found a workaround that can help you to do the test in a strange way, but that you can adapt for your needs.
The workaround is based on the use of Sharing Global Variables in Multiprocessing by multiprocessing.Value (see the documentation).
To do this I have defined 2 sharing variables and 2 functions as below:
from multiprocessing import Value
shared_value = Value('i', 0)
stopped = Value('i',0)
# this function is used to substitute method on_any_event() of class B
def on_any_event_spy(event):
shared_value.value += 1
# this is the target for Process p1
def f(a: A):
# substitution of on_any_event method with on_any_event_spy()
a.handler.on_any_event = on_any_event_spy
a.run()
Furthermore I have to modify the run() and stop() methods of class A.
New method stop() of class A:
def stop(self):
stopped.value = 1
Method run() of class A (change only the condition of the while):
def run(self):
# listen for changes in a folder using watchdog.observers.Observer
self.observer.schedule(self.handler, self.input_directory, recursive=True)
self.observer.start()
try:
#while not self.stopped: # <------ comment this instruction
while stopped.value == 0: # <----- add this instruction
time.sleep(self.scanning_frequency)
except:
self.observer.stop()
self.observer.join()
The test method becomes:
class MyTestCase(unittest.TestCase):
def test_file_watcher(self):
# arrange
b = B()
a = A(b)
shutil.copy( # copy file to the watched folder)
# I have changed yor target value and add args
p1 = Process(target=f, args=(a, ))
p1.start()
time.sleep(a.scanning_frequency + 1)
a.stop() # stops watching the folder
# the shared_value value MUST BE > 0
self.assertGreater(shared_value.value, 0)
p1.join()
if __name__ == '__main__':
unittest.main()
How to mock B completely
The previous paragraph of this answer tells that the real problem of this test is the multiprocessing, but if you want mock B completely as you ask in your question, try to change your test_file_watcher() as following:
def test_file_watcher(self):
# arrange
#b = B() # <---------------- ------------- comment this instruction
b = Mock(wraps=B()) # <--------------------- add this instruction
a = A(b)
#a.handler.on_any_event = MagicMock() # <--- comment this instruction
shutil.copy(# copy file to the watched folder)
p1 = Process(target=a.run)
p1.start()
time.sleep(folder scanning frequency + 1 second)
a.stop() # stops watching the folder
#assert a.handler.on_any_event.called# <---- comment this instruction
assert b.on_any_event.called <---- add this instruction
p1.join()
I hope that with the instruction:
b = Mock(wraps=B())
you will wrap B completely as you ask in your question and this can be useful for future more traditionally tests.

Related

How to execute AST or code object in a separate process without exceeding max recursion depth

I am trying to write a metamorphic quine. Without the "spawn" context, the subprocesses seem to inherit the stack, and so I ultimately exceed the max recursion depth. With the "spawn context," the subprocess doesn't seem to recurse. How would I go about executing the modified AST?
def main():
module = sys.modules[__name__]
source = inspect.getsource(module)
tree = ast.parse(source)
visitor = Visitor() # TODO mutate
tree = visitor.visit(tree)
tree = ast.fix_missing_locations(tree)
ctx = multiprocessing.get_context("spawn")
process = ctx.Process(target=Y, args=(tree,))
# Y() encapsulates these lines, since code objects can't be pickled
#code = compile(tree, filename="<ast>", mode='exec', optimize=2)
#process = ctx.Process(target=exec, args=(code, globals())) # locals()
process.daemon = True
process.start()
# TODO why do daemonized processes need to be joined in order to run?
process.join()
return 0
if __name__ == '__main__': exit(main())
It really is that easy. with daemon.DaemonContext(): foo()
Based on comments by #user2357112 supports Monica.
#trace
def spawn_child(f:Callable):
with daemon.DaemonContext(stdin=sys.stdin, stdout=sys.stdout): return f()
I = TypeVar('I')
def ai(f:Callable[[int,], I])->Callable[[int,], I]:
def g(*args, **kwargs)->int:
# assuming we have a higher-order function morph()
# that has a concept of eta-equivalence
# (e.g., a probabilistic notion),
# then the recursive call should be "metamorphic"
O = [morph(f), status, partial(spawn_child, f),]
i = random.randrange(0, len(O)) # TODO something magickal
return O[i]()
return g
def main()->int: return Y(ai)()
if __name__ == '__main__': exit(main())
The next problem is compiling the source for a nested function definition, since f() is not a reference to ai() but to a function defined within Y().

How to allow a class's variables to be modified concurrently by multiple threads

I have a class (MyClass) which contains a queue (self.msg_queue) of actions that need to be run and I have multiple sources of input that can add tasks to the queue.
Right now I have three functions that I want to run concurrently:
MyClass.get_input_from_user()
Creates a window in tkinter that has the user fill out information and when the user presses submit it pushes that message onto the queue.
MyClass.get_input_from_server()
Checks the server for a message, reads the message, and then puts it onto the queue. This method uses functions from MyClass's parent class.
MyClass.execute_next_item_on_the_queue()
Pops a message off of the queue and then acts upon it. It is dependent on what the message is, but each message corresponds to some method in MyClass or its parent which gets run according to a big decision tree.
Process description:
After the class has joined the network, I have it spawn three threads (one for each of the above functions). Each threaded function adds items from the queue with the syntax "self.msg_queue.put(message)" and removes items from the queue with "self.msg_queue.get_nowait()".
Problem description:
The issue I am having is that it seems that each thread is modifying its own queue object (they are not sharing the queue, msg_queue, of the class of which they, the functions, are all members).
I am not familiar enough with Multiprocessing to know what the important error messages are; however, it is stating that it cannot pickle a weakref object (it gives no indication of which object is the weakref object), and that within the queue.put() call the line "self._sem.acquire(block, timeout) yields a '[WinError 5] Access is denied'" error. Would it be safe to assume that this failure in the queue's reference not copying over properly?
[I am using Python 3.7.2 and the Multiprocessing package's Process and Queue]
[I have seen multiple Q/As about having threads shuttle information between classes--create a master harness that generates a queue and then pass that queue as an argument to each thread. If the functions didn't have to use other functions from MyClass I could see adapting this strategy by having those functions take in a queue and use a local variable rather than class variables.]
[I am fairly confident that this error is not the result of passing my queue to the tkinter object as my unit tests on how my GUI modifies its caller's queue work fine]
Below is a minimal reproducible example for the queue's error:
from multiprocessing import Queue
from multiprocessing import Process
import queue
import time
class MyTest:
def __init__(self):
self.my_q = Queue()
self.counter = 0
def input_function_A(self):
while True:
self.my_q.put(self.counter)
self.counter = self.counter + 1
time.sleep(0.2)
def input_function_B(self):
while True:
self.counter = 0
self.my_q.put(self.counter)
time.sleep(1)
def output_function(self):
while True:
try:
var = self.my_q.get_nowait()
except queue.Empty:
var = -1
except:
break
print(var)
time.sleep(1)
def run(self):
process_A = Process(target=self.input_function_A)
process_B = Process(target=self.input_function_B)
process_C = Process(target=self.output_function)
process_A.start()
process_B.start()
process_C.start()
# without this it generates the WinError:
# with this it still behaves as if the two input functions do not modify the queue
process_C.join()
if __name__ == '__main__':
test = MyTest()
test.run()
Indeed - these are not "threads" - these are "processes" - while if you were using multithreading, and not multiprocessing, the self.my_q instance would be the same object, placed at the same memory space on the computer,
multiprocessing does a fork of the process, and any data in the original process (the one in execution in the "run" call) will be duplicated when it is used - so, each subprocess will see its own "Queue" instance, unrelated to the others.
The correct way to have various process share a multiprocessing.Queue object is to pass it as a parameter to the target methods. The simpler way to reorganize your code so that it works is thus:
from multiprocessing import Queue
from multiprocessing import Process
import queue
import time
class MyTest:
def __init__(self):
self.my_q = Queue()
self.counter = 0
def input_function_A(self, queue):
while True:
queue.put(self.counter)
self.counter = self.counter + 1
time.sleep(0.2)
def input_function_B(self, queue):
while True:
self.counter = 0
queue.put(self.counter)
time.sleep(1)
def output_function(self, queue):
while True:
try:
var = queue.get_nowait()
except queue.Empty:
var = -1
except:
break
print(var)
time.sleep(1)
def run(self):
process_A = Process(target=self.input_function_A, args=(queue,))
process_B = Process(target=self.input_function_B, args=(queue,))
process_C = Process(target=self.output_function, args=(queue,))
process_A.start()
process_B.start()
process_C.start()
# without this it generates the WinError:
# with this it still behaves as if the two input functions do not modify the queue
process_C.join()
if __name__ == '__main__':
test = MyTest()
test.run()
As you can see, since your class is not actually sharing any data through the instance's attributes, this "class" design does not make much sense for your application - but for grouping the different workers in the same code block.
It would be possible to have a magic-multiprocess-class that would have some internal method to actually start the worker-methods and share the Queue instance - so if you have a lot of those in a project, there would be a lot less boilerplate.
Something along:
from multiprocessing import Queue
from multiprocessing import Process
import time
class MPWorkerBase:
def __init__(self, *args, **kw):
self.queue = None
self.is_parent_process = False
self.is_child_process = False
self.processes = []
# ensure this can be used as a colaborative mixin
super().__init__(*args, **kw)
def run(self):
if self.is_parent_process or self.is_child_process:
# workers already initialized
return
self.queue = Queue()
processes = []
cls = self.__class__
for name in dir(cls):
method = getattr(cls, name)
if callable(method) and getattr(method, "_MP_worker", False):
process = Process(target=self._start_worker, args=(self.queue, name))
self.processes.append(process)
process.start()
# Setting these attributes here ensure the child processes have the initial values for them.
self.is_parent_process = True
self.processes = processes
def _start_worker(self, queue, method_name):
# this method is called in a new spawned process - attribute
# changes here no longer reflect attributes on the
# object in the initial process
# overwrite queue in this process with the queue object sent over the wire:
self.queue = queue
self.is_child_process = True
# call the worker method
getattr(self, method_name)()
def __del__(self):
for process in self.processes:
process.join()
def worker(func):
"""decorator to mark a method as a worker that should
run in its own subprocess
"""
func._MP_worker = True
return func
class MyTest(MPWorkerBase):
def __init__(self):
super().__init__()
self.counter = 0
#worker
def input_function_A(self):
while True:
self.queue.put(self.counter)
self.counter = self.counter + 1
time.sleep(0.2)
#worker
def input_function_B(self):
while True:
self.counter = 0
self.queue.put(self.counter)
time.sleep(1)
#worker
def output_function(self):
while True:
try:
var = self.queue.get_nowait()
except queue.Empty:
var = -1
except:
break
print(var)
time.sleep(1)
if __name__ == '__main__':
test = MyTest()
test.run()

python thread pool copy parameters

I'm learning about multithreading and I try to implement a few things to understand it.
After reading several (and very technical topics) I cannot find a solution or way to understand my issue.
Basically, I have the following structure:
class MyObject():
def __init__():
self.lastupdate = datetime.datetime.now()
def DoThings():
...
def MyThreadFunction(OneOfMyObject):
OneOfMyObject.DoThings()
OneOfMyObject.lastupdate = datetime.datetime.now()
def main():
MyObject1 = MyObject()
MyObject2 = MyObject()
MyObjects = [MyObject1, MyObject2]
pool = Pool(2)
while True:
pool.map(MyThreadFunction, MyObjects)
if __name__ == '__main__':
main()
I think the function .map make a copy of my objects because it does not update the time. Is it right ? if yes, how could I input a Global version of my objects. If not, would you have any idea why the time is fixed in my objects ?
When I check the new time with print(MyObject.lastupdate), the time is right, but not in the next loop
Thank you very much for any of your ideas
Yes, python threading will serialize (actually, pickle) your objects and then reconstruct them in the thread. However, it also sends them back. To recover them, see the commented additions to the code below:
class MyObject():
def __init__():
self.lastupdate = datetime.datetime.now()
def DoThings():
...
def MyThreadFunction(OneOfMyObject):
OneOfMyObject.DoThings()
OneOfMyObject.lastupdate = datetime.datetime.now()
# NOW, RETURN THE OBJECT
return oneOfMyObject
def main():
MyObject1 = MyObject()
MyObject2 = MyObject()
MyObjects = [MyObject1, MyObject2]
with Pool(2) as pool: # <- this is just a neater way of doing it than a while loop for various reasons. Checkout context managers if interested.
# Now we recover a list of the updated objects:
processed_object_list = pool.map(MyThreadFunction, MyObjects)
# Now inspect
for my_object in processed_object_list:
print(my_object.lastupdate)
if __name__ == '__main__':
main()

python - How to define function that I can use efficiently across modules and directories

I want to implement a timer to measure how long a block of code takes to run. I then want to do this across an entire application containing multiple modules (40+) across multiple directories (4+).
My timer is created with two functions that are within a class with a structure like this:
class SubClass(Class1)
def getStartTime(self):
start = time.time()
return start
def logTiming(self, classstring, start):
fin = time.time() - start
logging.getLogger('perf_log_handler').info((classstring + ' sec').format(round(fin,3)))
The first function gets the start time, and the second function calculates the time for the block to run and then logs it to a logger.
This code is in a module that we'll call module1.py.
In practice, generically, it will be implemented as such:
class SubSubClass(SubClass)
def Some_Process
stim = super().getStartTime()
code..............................
...
...
...
...
super().logTiming("The Process took: {}", stim)
return Result_Of_Process
This code resides in a module called module2.py and already works and successfully logs. My problem is that when structured like this, I can seemingly only use the timer inside code that is under the umbrella of SubClass, where it is defined (my application fails to render and I get a "can't find page" error in my browser). But I want to use this code everywhere in all the application modules, globally. Whether the module is within another directory, whether some blocks of code are within other classes and subclasses inside other modules, everywhere.
What is the easiest, most efficient way to create this timing instrument so that I can use it anywhere in my application? I understand I may have to define it completely differently. I am very new to all of this, so any help is appreciated.
OPTION 1) You should define another module, for example, "mytimer.py" fully dedicated to the timer:
import time
class MyTimer():
def __init__(self):
self.start = time.time()
def log(self):
now = time.time()
return now - self.start
And then, from any line of your code, for example, in module2.py:
from mytimer import MyTimer
class SomeClass()
def Some_Function
t = MyTimer()
....
t.log()
return ...
OPTION 2) You could also use a simple function instead of a class:
import time
def mytimer(start=None, tag=""):
if start is None:
start = time.time()
now = time.time()
delay = float(now - start)
print "%(tag)s %(delay).2f seconds." % {'tag': tag, 'delay': delay}
return now
And then, in your code:
from mytimer import mytimer
class SomeClass()
def Some_Function
t = mytimer(tag='BREAK0')
....
t = mytimer(start=t, tag='BREAK1')
....
t = mytimer(start=t, tag='BREAK2')
....
t = mytimer(start=t, tag='BREAK3')
return ...
I am not quite sure what you are looking for, but once upon a time I used a decorator for a similar type of problem.
The snippet below is the closest I can remember to what I implemented at that time. Hopefully it is useful to you.
Brief explanation
The timed is a 'decorator' that wraps methods in the python object and times the method.
The class contains a log that is updated by the wrapper as the #timed methods are called.
Note that if you want to make the #property act as a "class property" you can draw inspiration from this post.
from time import sleep, time
# -----------------
# Define Decorators
# ------------------
def timed(wrapped):
def wrapper(self, *arg, **kwargs):
start = time()
res = wrapped(self, *arg, **kwargs)
stop = time()
self.log = {'method': wrapped.__name__, 'called': start, 'elapsed': stop - start}
return res
return wrapper
# -----------------
# Define Classes
# ------------------
class Test(object):
__log = []
#property
def log(self):
return self.__log
#log.setter
def log(self, kwargs):
self.__log.append(kwargs)
#timed
def test(self):
print("Running timed method")
sleep(2)
#timed
def test2(self, a, b=2):
print("Running another timed method")
sleep(2)
return a+b
# ::::::::::::::::::
if __name__ == '__main__':
t = Test()
res = t.test()
res = t.test2(1)
print(t.log)

using python multiprocessing package inside a qgis plugin code

I spent quite a bit of time looking on how to use the multiprocessing package, but couldn't find anything on how to use it inside a plugin in QGIS. I am developing a plugin that does some optimization for several elements. I would like to parallelize it.
I found a useful link on multi-threading inside a python plugin (http://snorf.net/blog/2013/12/07/multithreading-in-qgis-python-plugins/), but nothing on using the multiprocessing module, which might be easier?
I have been trying with a very basic example. I am only showing the run function from the plugin here:
def run(self):
"""Run method that performs all the real work"""
# show the dialog
self.dlg.show()
# Run the dialog event loop
result = self.dlg.exec_()
# See if OK was pressed and run code
if result:
#Get number of cores
nProcs = mp.cpu_count()
#Start a Process
p = mp.Pool(nProcs)
#Define function
def cube(x):
return x**3
#Run parallel
results = p.map(cube, range(1,7))
When I run this code from the plugin in QGIS, it opens several QGIS windows, which then return errors (can't load layers, etc.). What am I missing? Do I need to start a worker first on another thread and then use multiprocessing there? Or would we use another function from multiprocessing?
Please let me know if the question needs edits. I am working under windows 7, using QGIS 2.10.
Thanks,
UPDATE
I created a worker class to implement the function and sent it to a new thread, but I get the same problem when I use multiprocessing in that thread.
The class I created is as follows:
class Worker(QObject):
'''Example worker'''
def __init__(self, result_queue, f, attr=[], repet=None, nbCores=None):
QObject.__init__(self)
if not hasattr(f, '__call__'):
#Check if not a function
raise TypeError('Worker expected a function as second argument')
if not isinstance(attr, list) and not repet==None:
#Check if not a list if there is a repet command
raise TypeError('Input problem:\nThe arguments for the function should be in a list if repet is provided')
if not all(isinstance(elem, list) for elem in attr) and repet==None and len(inspect.getargspec(f).args) > 1:
#Check if not a list of lists if there isn't a repet command
raise TypeError('Input problem:\nThe arguments for the function should be a list of lists if repet is not provided')
if not repet == None and (not isinstance(repet, int) or repet == 0):
#Check that provided an integer greater than 0
raise TypeError('If provided, repet should be None or a strictly positive integer')
self.result_queue = result_queue
self.f = f
self.attr = attr
self.repet = repet
self.nbCores = nbCores
if self.nbCores == None:
self.nbCores = mp.cpu_count() - 1
def fStar(self, arg):
"""Convert the function to taking a list as arguments"""
return self.f(*arg)
def run(self):
ret = None
try:
if self.repet == 1:
# estimates the function based on provided arguments
ret = self.f(*self.attr) #The star unpacks the list into attributes
else:
pool = mp.Pool(processes=self.nbCores)
if self.repet > 1:
ret = pool.map(self.fStar, itools.repeat(self.attr,self.repet))
elif self.repet == None:
ret = pool.map(self.fStar, self.attr)
pool.close()
pool.join()
except Exception, e:
#I can't pass an exception, it makes qgis bug
pass
self.result_queue.put(ret) #Pass the result to the queue
finished = pyqtSignal(object)
error = pyqtSignal(Exception, basestring)
I start the worker and send it to a new thread using the following function:
def startWorker(f, attr, repet=None, nbCores=None):
#Create a result queue
result_queue = queue.Queue()
# create a new worker instance
worker = Worker(result_queue, f, attr, repet, nbCores)
# start the worker in a new thread
thread = QThread()
worker.moveToThread(thread)
thread.started.connect(worker.run)
thread.start()
#Clean up when the thread is finished
worker.deleteLater()
thread.quit()
thread.wait()
thread.deleteLater()
#Export the result to the queue
res = []
while not result_queue.empty():
r = result_queue.get()
if r is None:
continue
res.append(r)
return res
As in my initial question, I just replaced results = p.map(cube, range(1,7)) by calling the startWorker function
Please let me know if you have any idea how to make this work. I implemented the work in multiple threads, but it would be much faster to use several cores...

Categories