Let two functions run periodically with different 'sampling' times - python

I already managed executing one function periodically with a specific sampling time T with the python scheduler from the sched package:
import sched
import time
def cycle(sche, T, fun, arg):
sche.enter(T, 1, cycle, (sche, T, fun, arg))
fun(arg)
def fun(arg):
print(str(time.time()))
print(arg)
def main():
scheduler = sched.scheduler(time.time, time.sleep)
T = 1
arg = "some argument"
cycle(scheduler, T, fun, arg)
scheduler.run()
What I would like to do is adding another function fun2(), that also will be executed periodically with another sample time T2.
What would be a proper way to do that?

So for me the following solution worked:
as I will have two CPU bound tasks I set up a multiprocessing environment with two processes. Each process starts an own scheduler that runs 'forever' with its own 'sampling' time.
What does anybody with more experience in python than me (I've just started :-D) think about this approach? Will it cause any problems in your opinion?
import time
import multiprocessing
import sched
global schedule1
global schedule2
def fun1(arg):
print("Im the function that is executed every T1")
time.sleep(0.05) # do something for t < T1
def fun2(arg):
print("Im the function that is executed every T2")
time.sleep(0.8) # do something for t < T2
def cycle1(scheduler1, T1, fun, arg):
global schedule1
try:
schedule1.append(scheduler1.enter(T1, 1, cycle1, (scheduler1, T1, fun, arg)))
fun1(arg)
scheduler1.run()
except KeyboardInterrupt:
for event in schedule1:
try:
scheduler1.cancel(event)
except ValueError:
continue
return
def cycle2(scheduler2, T2, fun, arg):
global schedule2
try:
schedule2.append(scheduler2.enter(T2, 1, cycle2, (scheduler2, T2, fun, arg)))
fun2(arg)
scheduler2.run()
except KeyboardInterrupt:
for event in schedule2:
try:
scheduler2.cancel(event)
except ValueError:
continue
return
def main():
global schedule2
global schedule1
schedule2 = []
schedule1 = []
scheduler1 = sched.scheduler(time.time, time.sleep)
scheduler2 = sched.scheduler(time.time, time.sleep)
T1 = 0.1
T2 = 1
list_of_arguments_for_fun1 = []
list_of_arguments_for_fun2 = []
processes = []
# set up first process
process1 = multiprocessing.Process(target=cycle1, args=(scheduler1, T1, fun1, list_of_arguments_for_fun1))
processes.append(process1)
# set up second process
process2 = multiprocessing.Process(target=cycle2, args=(scheduler2, T2, list_of_arguments_for_fun2, list_of_arguments_for_fun2))
processes.append(process2)
process1.start()
process2.start()
for process in processes:
process.join()
# anything below here in the main() won't be executed
if __name__ == "__main__":
try:
start = time.perf_counter()
main()
except KeyboardInterrupt:
print('\nCancelled by User. Bye!')
finish = time.perf_counter()
print(f'Finished in {round(finish - start, 2)} second(s)')

Related

How do I access data from a python thread

I have a very simple threading example using Python 3.4.2. In this example I am creating a five threads that just returns the character string "Result" and appends it to an array titled thread. In another for loop iterated five times the threads are joined to the term x. I am trying to print the result x, which should yield a list that looks like ['Resut','Result','Result','Result','Result'] but instead the print command only yields the title of the thread and the fact that it is closed. Im obviously misunderstanding how to use threads in python. If someone could provide an example of how to adequately complete this test case I would be very grateful.
import threading
def Thread_Test():
return ("Result")
number = 5
threads = []
for i in range(number):
Result = threading.Thread(target=Thread_Test)
threads.append(Result)
Result.start()
for x in threads:
x.join()
print (x)
There is a difference between creating a thread and trying to get values out of a thread. Generally speaking, you should never try to use return in a thread to provide a value back to its caller. That is not how threads work. When you create a thread object, you have to figure out a different way of get any values calculated in the thread to some other part of your program. The following is a simple example showing how values might be returned using a list.
#! /usr/bin/env python3
import threading
def main():
# Define a few variables including storage for threads and values.
threads_to_create = 5
threads = []
results = []
# Create, start, and store all of the thread objects.
for number in range(threads_to_create):
thread = threading.Thread(target=lambda: results.append(number))
thread.start()
threads.append(thread)
# Ensure all threads are done and show the results.
for thread in threads:
thread.join()
print(results)
if __name__ == '__main__':
main()
If you absolutely insist that you must have the ability to return values from the target of a thread, it is possible to override some methods in threading.Thread using a child class to get the desired behavior. The following shows more advanced usage and demonstrates how multiple methods require a change in case someone desires to inherit from and override the run method of the new class. This code is provided for completeness and probably should not be used.
#! /usr/bin/env python3
import sys as _sys
import threading
def main():
# Define a few variables including storage for threads.
threads_to_create = 5
threads = []
# Create, start, and store all of the thread objects.
for number in range(threads_to_create):
thread = ThreadWithReturn(target=lambda: number)
thread.start()
threads.append(thread)
# Ensure all threads are done and show the results.
print([thread.returned for thread in threads])
class ThreadWithReturn(threading.Thread):
def __init__(self, group=None, target=None, name=None,
args=(), kwargs=None, *, daemon=None):
super().__init__(group, target, name, args, kwargs, daemon=daemon)
self.__value = None
def run(self):
try:
if self._target:
return self._target(*self._args, **self._kwargs)
finally:
del self._target, self._args, self._kwargs
def _bootstrap_inner(self):
try:
self._set_ident()
self._set_tstate_lock()
self._started.set()
with threading._active_limbo_lock:
threading._active[self._ident] = self
del threading._limbo[self]
if threading._trace_hook:
_sys.settrace(threading._trace_hook)
if threading._profile_hook:
threading. _sys.setprofile(threading._profile_hook)
try:
self.__value = True, self.run()
except SystemExit:
pass
except:
exc_type, exc_value, exc_tb = self._exc_info()
self.__value = False, exc_value
if _sys and _sys.stderr is not None:
print("Exception in thread %s:\n%s" %
(self.name, threading._format_exc()), file=_sys.stderr)
elif self._stderr is not None:
try:
print((
"Exception in thread " + self.name +
" (most likely raised during interpreter shutdown):"), file=self._stderr)
print((
"Traceback (most recent call last):"), file=self._stderr)
while exc_tb:
print((
' File "%s", line %s, in %s' %
(exc_tb.tb_frame.f_code.co_filename,
exc_tb.tb_lineno,
exc_tb.tb_frame.f_code.co_name)), file=self._stderr)
exc_tb = exc_tb.tb_next
print(("%s: %s" % (exc_type, exc_value)), file=self._stderr)
finally:
del exc_type, exc_value, exc_tb
finally:
pass
finally:
with threading._active_limbo_lock:
try:
del threading._active[threading.get_ident()]
except:
pass
#property
def returned(self):
if self.__value is None:
self.join()
if self.__value is not None:
valid, value = self.__value
if valid:
return value
raise value
if __name__ == '__main__':
main()
please find the below simple example for queue and threads,
import threading
import Queue
import timeit
q = Queue.Queue()
number = 5
t1 = timeit.default_timer()
# Step1: For example, we are running multiple functions normally
result = []
def fun(x):
result.append(x)
return x
for i in range(number):
fun(i)
print result ," # normal result"
print (timeit.default_timer() - t1)
t2 = timeit.default_timer()
#Step2: by using threads and queue
def fun_thrd(x,q):
q.put(x)
return
for i in range(number):
t1 = threading.Thread(target = fun_thrd, args=(i,q))
t1.start()
t1.join()
thrd_result = []
while True:
if not q.empty():
thrd_result.append(q.get())
else:
break
print thrd_result , "# result with threads involved"
print (timeit.default_timer() - t2)
t3 = timeit.default_timer()
#step :3 if you want thread to be run without depending on the previous thread
threads = []
def fun_thrd_independent(x,q):
q.put(x)
return
def thread_indep(number):
for i in range(number):
t = threading.Thread(target = fun_thrd_independent, args=(i,q))
t.start()
threads.append(t)
thread_indep(5)
for j in threads:
j.join()
thread_indep_result = []
while True:
if not q.empty():
thread_indep_result.append(q.get())
else:
break
print thread_indep_result # result when threads are independent on each other
print (timeit.default_timer() - t3)
output:
[0, 1, 2, 3, 4] # normal result
3.50475311279e-05
[0, 1, 2, 3, 4] # result with threads involved
0.000977039337158
[0, 1, 2, 3, 4] result when threads are independent on each other
0.000933170318604
It will hugely differ according to the scale of the data
Hope this helps, Thanks

How to terminate a thread in python after a certain amount of time?

I have multiple threads that run a while loop. I would like to terminate these threads after a given amount of time. I am aware of other questions similar to this but I don't see how I can transfer those answers to my code.
def function1(arg1, arg2, arg3, duration):
t_end = time.time() + duration
while time.time() < t_end:
#do some stuff
for i in range(100):
t = Thread(target = function1, args=(arg1, arg2, arg3, 10))
t.start()
This opens 100 threads but they never close. How can I close these threads after the specified time, in this example 10 seconds? My function opens a socket.
You could pass a callback to each thread. And create a thread list.
threadlist = {}
def cb(id, currtime):
t = threadlist[id]
d = currtime - t.starttime
if d > 10:
return True
else:
return False
def function1(arg1, arg2, arg3, duration, cb, threadid):
t_end = time.time() + duration
while time.time() < t_end:
#do some stuff
if cb(threadid, time.time()):
break
for i in range(100):
t = Thread(target = function1, args=(arg1, arg2, arg3, 10, cb, i))
threadlist[id] = {"starttime": time.time(), "thread": t}
t.start()
And to check:
time.sleep(15)
for item in threadlist.values():
print(item.thread.is_alive())
Use a mixture of terminating a thread (info found here: Is there any way to kill a Thread in Python?)
and threading timer objects: https://docs.python.org/2/library/threading.html#timer-objects
The code below works for me, but the fact it keeps throwing a TypeError has got me puzzled. I can't seem to find much information on why it's happening or how to prevent it:
threadingtest.py
#!/usr/bin/env python3
import time
import threading
class StoppableThread(threading.Thread):
"""Thread class with a stop() method. The thread itself has to check
regularly for the stopped() condition."""
def __init__(self):
super(StoppableThread, self).__init__()
self._stop = threading.Event()
def stop(self):
self._stop.set()
try:
self.join()
except TypeError as tE:
print("Shutting down")
def stopped(self):
return self._stop.isSet()
class MyStoppableThread(StoppableThread):
def __init__(self, *args):
super(MyStoppableThread, self).__init__()
self.args = args # Use these in the thread
def run(self):
print("Started my thread with arguments {}".format(self.args))
while not self.stopped():
time.sleep(1)
# THIS IS WHERE YOU DO THINGS
if __name__ == "__main__":
threads = []
for i in range(100):
t = MyStoppableThread(i, 'a', 'b', 'c')
t.start()
threads.append(t)
print("\n:: all threads created\n")
time.sleep(5)
print("\n:: killing all threads\n");
for t in threads:
t.stop()

Python threading: can I sleep on two threading.Event()s simultaneously?

If I have two threading.Event() objects, and wish to sleep until either one of them is set, is there an efficient way to do that in python? Clearly I could do something with polling/timeouts, but I would like to really have the thread sleep until one is set, akin to how select is used for file descriptors.
So in the following implementation, what would an efficient non-polling implementation of wait_for_either look like?
a = threading.Event()
b = threading.Event()
wait_for_either(a, b)
Here is a non-polling non-excessive thread solution: modify the existing Events to fire a callback whenever they change, and handle setting a new event in that callback:
import threading
def or_set(self):
self._set()
self.changed()
def or_clear(self):
self._clear()
self.changed()
def orify(e, changed_callback):
e._set = e.set
e._clear = e.clear
e.changed = changed_callback
e.set = lambda: or_set(e)
e.clear = lambda: or_clear(e)
def OrEvent(*events):
or_event = threading.Event()
def changed():
bools = [e.is_set() for e in events]
if any(bools):
or_event.set()
else:
or_event.clear()
for e in events:
orify(e, changed)
changed()
return or_event
Sample usage:
def wait_on(name, e):
print "Waiting on %s..." % (name,)
e.wait()
print "%s fired!" % (name,)
def test():
import time
e1 = threading.Event()
e2 = threading.Event()
or_e = OrEvent(e1, e2)
threading.Thread(target=wait_on, args=('e1', e1)).start()
time.sleep(0.05)
threading.Thread(target=wait_on, args=('e2', e2)).start()
time.sleep(0.05)
threading.Thread(target=wait_on, args=('or_e', or_e)).start()
time.sleep(0.05)
print "Firing e1 in 2 seconds..."
time.sleep(2)
e1.set()
time.sleep(0.05)
print "Firing e2 in 2 seconds..."
time.sleep(2)
e2.set()
time.sleep(0.05)
The result of which was:
Waiting on e1...
Waiting on e2...
Waiting on or_e...
Firing e1 in 2 seconds...
e1 fired!or_e fired!
Firing e2 in 2 seconds...
e2 fired!
This should be thread-safe. Any comments are welcome.
EDIT: Oh and here is your wait_for_either function, though the way I wrote the code, it's best to make and pass around an or_event. Note that the or_event shouldn't be set or cleared manually.
def wait_for_either(e1, e2):
OrEvent(e1, e2).wait()
I think the standard library provides a pretty canonical solution to this problem that I don't see brought up in this question: condition variables. You have your main thread wait on a condition variable, and poll the set of events each time it is notified. It is only notified when one of the events is updated, so there is no wasteful polling. Here is a Python 3 example:
from threading import Thread, Event, Condition
from time import sleep
from random import random
event1 = Event()
event2 = Event()
cond = Condition()
def thread_func(event, i):
delay = random()
print("Thread {} sleeping for {}s".format(i, delay))
sleep(delay)
event.set()
with cond:
cond.notify()
print("Thread {} done".format(i))
with cond:
Thread(target=thread_func, args=(event1, 1)).start()
Thread(target=thread_func, args=(event2, 2)).start()
print("Threads started")
while not (event1.is_set() or event2.is_set()):
print("Entering cond.wait")
cond.wait()
print("Exited cond.wait ({}, {})".format(event1.is_set(), event2.is_set()))
print("Main thread done")
Example output:
Thread 1 sleeping for 0.31569427100177794s
Thread 2 sleeping for 0.486548134317051s
Threads started
Entering cond.wait
Thread 1 done
Exited cond.wait (True, False)
Main thread done
Thread 2 done
Note that wit no extra threads or unnecessary polling, you can wait for an arbitrary predicate to become true (e.g. for any particular subset of the events to be set). There's also a wait_for wrapper for the while (pred): cond.wait() pattern, which can make your code a bit easier to read.
One solution (with polling) would be to do sequential waits on each Event in a loop
def wait_for_either(a, b):
while True:
if a.wait(tunable_timeout):
break
if b.wait(tunable_timeout):
break
I think that if you tune the timeout well enough the results would be OK.
The best non-polling I can think of is to wait for each one in a different thread and set a shared Event whom you will wait after in the main thread.
def repeat_trigger(waiter, trigger):
waiter.wait()
trigger.set()
def wait_for_either(a, b):
trigger = threading.Event()
ta = threading.Thread(target=repeat_trigger, args=(a, trigger))
tb = threading.Thread(target=repeat_trigger, args=(b, trigger))
ta.start()
tb.start()
# Now do the union waiting
trigger.wait()
Pretty interesting, so I wrote an OOP version of the previous solution:
class EventUnion(object):
"""Register Event objects and wait for release when any of them is set"""
def __init__(self, ev_list=None):
self._trigger = Event()
if ev_list:
# Make a list of threads, one for each Event
self._t_list = [
Thread(target=self._triggerer, args=(ev, ))
for ev in ev_list
]
else:
self._t_list = []
def register(self, ev):
"""Register a new Event"""
self._t_list.append(Thread(target=self._triggerer, args=(ev, )))
def wait(self, timeout=None):
"""Start waiting until any one of the registred Event is set"""
# Start all the threads
map(lambda t: t.start(), self._t_list)
# Now do the union waiting
return self._trigger.wait(timeout)
def _triggerer(self, ev):
ev.wait()
self._trigger.set()
This is an old question, but I hope this helps someone coming from Google.
The accepted answer is fairly old and will cause an infinite loop for twice-"orified" events.
Here is an implementation using concurrent.futures
import concurrent.futures
from concurrent.futures import ThreadPoolExecutor
def wait_for_either(events, timeout=None, t_pool=None):
'''blocks untils one of the events gets set
PARAMETERS
events (list): list of threading.Event objects
timeout (float): timeout for events (used for polling)
t_pool (concurrent.futures.ThreadPoolExecutor): optional
'''
if any(event.is_set() for event in events):
# sanity check
pass
else:
t_pool = t_pool or ThreadPoolExecutor(max_workers=len(events))
tasks = []
for event in events:
tasks.append(t_pool.submit(event.wait))
concurrent.futures.wait(tasks, timeout=timeout, return_when='FIRST_COMPLETED')
# cleanup
for task in tasks:
try:
task.result(timeout=0)
except concurrent.futures.TimeoutError:
pass
Testing the function
import threading
import time
from datetime import datetime, timedelta
def bomb(myevent, sleep_s):
'''set event after sleep_s seconds'''
with lock:
print('explodes in ', datetime.now() + timedelta(seconds=sleep_s))
time.sleep(sleep_s)
myevent.set()
with lock:
print('BOOM!')
lock = threading.RLock() # so prints don't get jumbled
a = threading.Event()
b = threading.Event()
t_pool = ThreadPoolExecutor(max_workers=2)
threading.Thread(target=bomb, args=(event1, 5), daemon=True).start()
threading.Thread(target=bomb, args=(event2, 120), daemon=True).start()
with lock:
print('1 second timeout, no ThreadPool', datetime.now())
wait_for_either([a, b], timeout=1)
with lock:
print('wait_event_or done', datetime.now())
print('=' * 15)
with lock:
print('wait for event1', datetime.now())
wait_for_either([a, b], t_pool=t_pool)
with lock:
print('wait_event_or done', datetime.now())
Starting extra threads seems a clear solution, not very effecient though.
Function wait_events will block util any one of events is set.
def wait_events(*events):
event_share = Event()
def set_event_share(event):
event.wait()
event.clear()
event_share.set()
for event in events:
Thread(target=set_event_share(event)).start()
event_share.wait()
wait_events(event1, event2, event3)
Extending Claudiu's answer where you can either wait for:
event 1 OR event 2
event 1 AND even 2
from threading import Thread, Event, _Event
class ConditionalEvent(_Event):
def __init__(self, events_list, condition):
_Event.__init__(self)
self.event_list = events_list
self.condition = condition
for e in events_list:
self._setup(e, self._state_changed)
self._state_changed()
def _state_changed(self):
bools = [e.is_set() for e in self.event_list]
if self.condition == 'or':
if any(bools):
self.set()
else:
self.clear()
elif self.condition == 'and':
if all(bools):
self.set()
else:
self.clear()
def _custom_set(self,e):
e._set()
e._state_changed()
def _custom_clear(self,e):
e._clear()
e._state_changed()
def _setup(self, e, changed_callback):
e._set = e.set
e._clear = e.clear
e._state_changed = changed_callback
e.set = lambda: self._custom_set(e)
e.clear = lambda: self._custom_clear(e)
Example usage will be very similar as before
import time
e1 = Event()
e2 = Event()
# Example to wait for triggering of event 1 OR event 2
or_e = ConditionalEvent([e1, e2], 'or')
# Example to wait for triggering of event 1 AND event 2
and_e = ConditionalEvent([e1, e2], 'and')
Not pretty, but you can use two additional threads to multiplex the events...
def wait_for_either(a, b):
flag = False #some condition variable, event, or similar
class Event_Waiter(threading.Thread):
def __init__(self, event):
self.e = event
def run(self):
self.e.wait()
flag.set()
a_thread = Event_Waiter(a)
b_thread = Event_Waiter(b)
a.start()
b.start()
flag.wait()
Note, you may have to worry about accidentally getting both events if they arrive too quickly. The helper threads (a_thread and b_thread) should lock synchronize around trying to set flag and then should kill the other thread (possibly resetting that thread's event if it was consumed).
def wait_for_event_timeout(*events):
while not all([e.isSet() for e in events]):
#Check to see if the event is set. Timeout 1 sec.
ev_wait_bool=[e.wait(1) for e in events]
# Process if all events are set. Change all to any to process if any event set
if all(ev_wait_bool):
logging.debug('processing event')
else:
logging.debug('doing other work')
e1 = threading.Event()
e2 = threading.Event()
t3 = threading.Thread(name='non-block-multi',
target=wait_for_event_timeout,
args=(e1,e2))
t3.start()
logging.debug('Waiting before calling Event.set()')
time.sleep(5)
e1.set()
time.sleep(10)
e2.set()
logging.debug('Event is set')

How to use multiprocessing queue in Python?

I'm having much trouble trying to understand just how the multiprocessing queue works on python and how to implement it. Lets say I have two python modules that access data from a shared file, let's call these two modules a writer and a reader. My plan is to have both the reader and writer put requests into two separate multiprocessing queues, and then have a third process pop these requests in a loop and execute as such.
My main problem is that I really don't know how to implement multiprocessing.queue correctly, you cannot really instantiate the object for each process since they will be separate queues, how do you make sure that all processes relate to a shared queue (or in this case, queues)
My main problem is that I really don't know how to implement multiprocessing.queue correctly, you cannot really instantiate the object for each process since they will be separate queues, how do you make sure that all processes relate to a shared queue (or in this case, queues)
This is a simple example of a reader and writer sharing a single queue... The writer sends a bunch of integers to the reader; when the writer runs out of numbers, it sends 'DONE', which lets the reader know to break out of the read loop.
You can spawn as many reader processes as you like...
from multiprocessing import Process, Queue
import time
import sys
def reader_proc(queue):
"""Read from the queue; this spawns as a separate Process"""
while True:
msg = queue.get() # Read from the queue and do nothing
if msg == "DONE":
break
def writer(count, num_of_reader_procs, queue):
"""Write integers into the queue. A reader_proc() will read them from the queue"""
for ii in range(0, count):
queue.put(ii) # Put 'count' numbers into queue
### Tell all readers to stop...
for ii in range(0, num_of_reader_procs):
queue.put("DONE")
def start_reader_procs(qq, num_of_reader_procs):
"""Start the reader processes and return all in a list to the caller"""
all_reader_procs = list()
for ii in range(0, num_of_reader_procs):
### reader_p() reads from qq as a separate process...
### you can spawn as many reader_p() as you like
### however, there is usually a point of diminishing returns
reader_p = Process(target=reader_proc, args=((qq),))
reader_p.daemon = True
reader_p.start() # Launch reader_p() as another proc
all_reader_procs.append(reader_p)
return all_reader_procs
if __name__ == "__main__":
num_of_reader_procs = 2
qq = Queue() # writer() writes to qq from _this_ process
for count in [10**4, 10**5, 10**6]:
assert 0 < num_of_reader_procs < 4
all_reader_procs = start_reader_procs(qq, num_of_reader_procs)
writer(count, len(all_reader_procs), qq) # Queue stuff to all reader_p()
print("All reader processes are pulling numbers from the queue...")
_start = time.time()
for idx, a_reader_proc in enumerate(all_reader_procs):
print(" Waiting for reader_p.join() index %s" % idx)
a_reader_proc.join() # Wait for a_reader_proc() to finish
print(" reader_p() idx:%s is done" % idx)
print(
"Sending {0} integers through Queue() took {1} seconds".format(
count, (time.time() - _start)
)
)
print("")
Here's a dead simple usage of multiprocessing.Queue and multiprocessing.Process that allows callers to send an "event" plus arguments to a separate process that dispatches the event to a "do_" method on the process. (Python 3.4+)
import multiprocessing as mp
import collections
Msg = collections.namedtuple('Msg', ['event', 'args'])
class BaseProcess(mp.Process):
"""A process backed by an internal queue for simple one-way message passing.
"""
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.queue = mp.Queue()
def send(self, event, *args):
"""Puts the event and args as a `Msg` on the queue
"""
msg = Msg(event, args)
self.queue.put(msg)
def dispatch(self, msg):
event, args = msg
handler = getattr(self, "do_%s" % event, None)
if not handler:
raise NotImplementedError("Process has no handler for [%s]" % event)
handler(*args)
def run(self):
while True:
msg = self.queue.get()
self.dispatch(msg)
Usage:
class MyProcess(BaseProcess):
def do_helloworld(self, arg1, arg2):
print(arg1, arg2)
if __name__ == "__main__":
process = MyProcess()
process.start()
process.send('helloworld', 'hello', 'world')
The send happens in the parent process, the do_* happens in the child process.
I left out any exception handling that would obviously interrupt the run loop and exit the child process. You can also customize it by overriding run to control blocking or whatever else.
This is really only useful in situations where you have a single worker process, but I think it's a relevant answer to this question to demonstrate a common scenario with a little more object-orientation.
I had a look at multiple answers across stack overflow and the web while trying to set-up a way of doing multiprocessing using queues for passing around large pandas dataframes. It seemed to me that every answer was re-iterating the same kind of solutions without any consideration of the multitude of edge cases one will definitely come across when setting up calculations like these. The problem is that there is many things at play at the same time. The number of tasks, the number of workers, the duration of each task and possible exceptions during task execution. All of these make synchronization tricky and most answers do not address how you can go about it. So this is my take after fiddling around for a few hours, hopefully this will be generic enough for most people to find it useful.
Some thoughts before any coding examples. Since queue.Empty or queue.qsize() or any other similar method is unreliable for flow control, any code of the like
while True:
try:
task = pending_queue.get_nowait()
except queue.Empty:
break
is bogus. This will kill the worker even if milliseconds later another task turns up in the queue. The worker will not recover and after a while ALL the workers will disappear as they randomly find the queue momentarily empty. The end result will be that the main multiprocessing function (the one with the join() on the processes) will return without all the tasks having completed. Nice. Good luck debugging through that if you have thousands of tasks and a few are missing.
The other issue is the use of sentinel values. Many people have suggested adding a sentinel value in the queue to flag the end of the queue. But to flag it to whom exactly? If there is N workers, assuming N is the number of cores available give or take, then a single sentinel value will only flag the end of the queue to one worker. All the other workers will sit waiting for more work when there is none left. Typical examples I've seen are
while True:
task = pending_queue.get()
if task == SOME_SENTINEL_VALUE:
break
One worker will get the sentinel value while the rest will wait indefinitely. No post I came across mentioned that you need to submit the sentinel value to the queue AT LEAST as many times as you have workers so that ALL of them get it.
The other issue is the handling of exceptions during task execution. Again these should be caught and managed. Moreover, if you have a completed_tasks queue you should independently count in a deterministic way how many items are in the queue before you decide that the job is done. Again relying on queue sizes is bound to fail and returns unexpected results.
In the example below, the par_proc() function will receive a list of tasks including the functions with which these tasks should be executed alongside any named arguments and values.
import multiprocessing as mp
import dill as pickle
import queue
import time
import psutil
SENTINEL = None
def do_work(tasks_pending, tasks_completed):
# Get the current worker's name
worker_name = mp.current_process().name
while True:
try:
task = tasks_pending.get_nowait()
except queue.Empty:
print(worker_name + ' found an empty queue. Sleeping for a while before checking again...')
time.sleep(0.01)
else:
try:
if task == SENTINEL:
print(worker_name + ' no more work left to be done. Exiting...')
break
print(worker_name + ' received some work... ')
time_start = time.perf_counter()
work_func = pickle.loads(task['func'])
result = work_func(**task['task'])
tasks_completed.put({work_func.__name__: result})
time_end = time.perf_counter() - time_start
print(worker_name + ' done in {} seconds'.format(round(time_end, 5)))
except Exception as e:
print(worker_name + ' task failed. ' + str(e))
tasks_completed.put({work_func.__name__: None})
def par_proc(job_list, num_cpus=None):
# Get the number of cores
if not num_cpus:
num_cpus = psutil.cpu_count(logical=False)
print('* Parallel processing')
print('* Running on {} cores'.format(num_cpus))
# Set-up the queues for sending and receiving data to/from the workers
tasks_pending = mp.Queue()
tasks_completed = mp.Queue()
# Gather processes and results here
processes = []
results = []
# Count tasks
num_tasks = 0
# Add the tasks to the queue
for job in job_list:
for task in job['tasks']:
expanded_job = {}
num_tasks = num_tasks + 1
expanded_job.update({'func': pickle.dumps(job['func'])})
expanded_job.update({'task': task})
tasks_pending.put(expanded_job)
# Use as many workers as there are cores (usually chokes the system so better use less)
num_workers = num_cpus
# We need as many sentinels as there are worker processes so that ALL processes exit when there is no more
# work left to be done.
for c in range(num_workers):
tasks_pending.put(SENTINEL)
print('* Number of tasks: {}'.format(num_tasks))
# Set-up and start the workers
for c in range(num_workers):
p = mp.Process(target=do_work, args=(tasks_pending, tasks_completed))
p.name = 'worker' + str(c)
processes.append(p)
p.start()
# Gather the results
completed_tasks_counter = 0
while completed_tasks_counter < num_tasks:
results.append(tasks_completed.get())
completed_tasks_counter = completed_tasks_counter + 1
for p in processes:
p.join()
return results
And here is a test to run the above code against
def test_parallel_processing():
def heavy_duty1(arg1, arg2, arg3):
return arg1 + arg2 + arg3
def heavy_duty2(arg1, arg2, arg3):
return arg1 * arg2 * arg3
task_list = [
{'func': heavy_duty1, 'tasks': [{'arg1': 1, 'arg2': 2, 'arg3': 3}, {'arg1': 1, 'arg2': 3, 'arg3': 5}]},
{'func': heavy_duty2, 'tasks': [{'arg1': 1, 'arg2': 2, 'arg3': 3}, {'arg1': 1, 'arg2': 3, 'arg3': 5}]},
]
results = par_proc(task_list)
job1 = sum([y for x in results if 'heavy_duty1' in x.keys() for y in list(x.values())])
job2 = sum([y for x in results if 'heavy_duty2' in x.keys() for y in list(x.values())])
assert job1 == 15
assert job2 == 21
plus another one with some exceptions
def test_parallel_processing_exceptions():
def heavy_duty1_raises(arg1, arg2, arg3):
raise ValueError('Exception raised')
return arg1 + arg2 + arg3
def heavy_duty2(arg1, arg2, arg3):
return arg1 * arg2 * arg3
task_list = [
{'func': heavy_duty1_raises, 'tasks': [{'arg1': 1, 'arg2': 2, 'arg3': 3}, {'arg1': 1, 'arg2': 3, 'arg3': 5}]},
{'func': heavy_duty2, 'tasks': [{'arg1': 1, 'arg2': 2, 'arg3': 3}, {'arg1': 1, 'arg2': 3, 'arg3': 5}]},
]
results = par_proc(task_list)
job1 = sum([y for x in results if 'heavy_duty1' in x.keys() for y in list(x.values())])
job2 = sum([y for x in results if 'heavy_duty2' in x.keys() for y in list(x.values())])
assert not job1
assert job2 == 21
Hope that is helpful.
in "from queue import Queue" there is no module called queue, instead multiprocessing should be used. Therefore, it should look like "from multiprocessing import Queue"
Just made a simple and general example for demonstrating passing a message over a Queue between 2 standalone programs. It doesn't directly answer the OP's question but should be clear enough indicating the concept.
Server:
multiprocessing-queue-manager-server.py
import asyncio
import concurrent.futures
import multiprocessing
import multiprocessing.managers
import queue
import sys
import threading
from typing import Any, AnyStr, Dict, Union
class QueueManager(multiprocessing.managers.BaseManager):
def get_queue(self, ident: Union[AnyStr, int, type(None)] = None) -> multiprocessing.Queue:
pass
def get_queue(ident: Union[AnyStr, int, type(None)] = None) -> multiprocessing.Queue:
global q
if not ident in q:
q[ident] = multiprocessing.Queue()
return q[ident]
q: Dict[Union[AnyStr, int, type(None)], multiprocessing.Queue] = dict()
delattr(QueueManager, 'get_queue')
def init_queue_manager_server():
if not hasattr(QueueManager, 'get_queue'):
QueueManager.register('get_queue', get_queue)
def serve(no: int, term_ev: threading.Event):
manager: QueueManager
with QueueManager(authkey=QueueManager.__name__.encode()) as manager:
print(f"Server address {no}: {manager.address}")
while not term_ev.is_set():
try:
item: Any = manager.get_queue().get(timeout=0.1)
print(f"Client {no}: {item} from {manager.address}")
except queue.Empty:
continue
async def main(n: int):
init_queue_manager_server()
term_ev: threading.Event = threading.Event()
executor: concurrent.futures.ThreadPoolExecutor = concurrent.futures.ThreadPoolExecutor()
i: int
for i in range(n):
asyncio.ensure_future(asyncio.get_running_loop().run_in_executor(executor, serve, i, term_ev))
# Gracefully shut down
try:
await asyncio.get_running_loop().create_future()
except asyncio.CancelledError:
term_ev.set()
executor.shutdown()
raise
if __name__ == '__main__':
asyncio.run(main(int(sys.argv[1])))
Client:
multiprocessing-queue-manager-client.py
import multiprocessing
import multiprocessing.managers
import os
import sys
from typing import AnyStr, Union
class QueueManager(multiprocessing.managers.BaseManager):
def get_queue(self, ident: Union[AnyStr, int, type(None)] = None) -> multiprocessing.Queue:
pass
delattr(QueueManager, 'get_queue')
def init_queue_manager_client():
if not hasattr(QueueManager, 'get_queue'):
QueueManager.register('get_queue')
def main():
init_queue_manager_client()
manager: QueueManager = QueueManager(sys.argv[1], authkey=QueueManager.__name__.encode())
manager.connect()
message = f"A message from {os.getpid()}"
print(f"Message to send: {message}")
manager.get_queue().put(message)
if __name__ == '__main__':
main()
Usage
Server:
$ python3 multiprocessing-queue-manager-server.py N
N is a integer indicating how many servers should be created. Copy one of the <server-address-N> output by the server and make it the first argument of each multiprocessing-queue-manager-client.py.
Client:
python3 multiprocessing-queue-manager-client.py <server-address-1>
Result
Server:
Client 1: <item> from <server-address-1>
Gist: https://gist.github.com/89062d639e40110c61c2f88018a8b0e5
UPD: Created a package here.
Server:
import ipcq
with ipcq.QueueManagerServer(address=ipcq.Address.AUTO, authkey=ipcq.AuthKey.AUTO) as server:
server.get_queue().get()
Client:
import ipcq
client = ipcq.QueueManagerClient(address=ipcq.Address.AUTO, authkey=ipcq.AuthKey.AUTO)
client.get_queue().put('a message')
We implemented two versions of this, one a simple multi thread pool that can execute many types of callables, making our lives much easier and the second version that uses processes, which is less flexible in terms of callables and requires and extra call to dill.
Setting frozen_pool to true will freeze execution until finish_pool_queue is called in either class.
Thread Version:
'''
Created on Nov 4, 2019
#author: Kevin
'''
from threading import Lock, Thread
from Queue import Queue
import traceback
from helium.loaders.loader_retailers import print_info
from time import sleep
import signal
import os
class ThreadPool(object):
def __init__(self, queue_threads, *args, **kwargs):
self.frozen_pool = kwargs.get('frozen_pool', False)
self.print_queue = kwargs.get('print_queue', True)
self.pool_results = []
self.lock = Lock()
self.queue_threads = queue_threads
self.queue = Queue()
self.threads = []
for i in range(self.queue_threads):
t = Thread(target=self.make_pool_call)
t.daemon = True
t.start()
self.threads.append(t)
def make_pool_call(self):
while True:
if self.frozen_pool:
#print '--> Queue is frozen'
sleep(1)
continue
item = self.queue.get()
if item is None:
break
call = item.get('call', None)
args = item.get('args', [])
kwargs = item.get('kwargs', {})
keep_results = item.get('keep_results', False)
try:
result = call(*args, **kwargs)
if keep_results:
self.lock.acquire()
self.pool_results.append((item, result))
self.lock.release()
except Exception as e:
self.lock.acquire()
print e
traceback.print_exc()
self.lock.release()
os.kill(os.getpid(), signal.SIGUSR1)
self.queue.task_done()
def finish_pool_queue(self):
self.frozen_pool = False
while self.queue.unfinished_tasks > 0:
if self.print_queue:
print_info('--> Thread pool... %s' % self.queue.unfinished_tasks)
sleep(5)
self.queue.join()
for i in range(self.queue_threads):
self.queue.put(None)
for t in self.threads:
t.join()
del self.threads[:]
def get_pool_results(self):
return self.pool_results
def clear_pool_results(self):
del self.pool_results[:]
Process Version:
'''
Created on Nov 4, 2019
#author: Kevin
'''
import traceback
from helium.loaders.loader_retailers import print_info
from time import sleep
import signal
import os
from multiprocessing import Queue, Process, Value, Array, JoinableQueue, Lock,\
RawArray, Manager
from dill import dill
import ctypes
from helium.misc.utils import ignore_exception
from mem_top import mem_top
import gc
class ProcessPool(object):
def __init__(self, queue_processes, *args, **kwargs):
self.frozen_pool = Value(ctypes.c_bool, kwargs.get('frozen_pool', False))
self.print_queue = kwargs.get('print_queue', True)
self.manager = Manager()
self.pool_results = self.manager.list()
self.queue_processes = queue_processes
self.queue = JoinableQueue()
self.processes = []
for i in range(self.queue_processes):
p = Process(target=self.make_pool_call)
p.start()
self.processes.append(p)
print 'Processes', self.queue_processes
def make_pool_call(self):
while True:
if self.frozen_pool.value:
sleep(1)
continue
item_pickled = self.queue.get()
if item_pickled is None:
#print '--> Ending'
self.queue.task_done()
break
item = dill.loads(item_pickled)
call = item.get('call', None)
args = item.get('args', [])
kwargs = item.get('kwargs', {})
keep_results = item.get('keep_results', False)
try:
result = call(*args, **kwargs)
if keep_results:
self.pool_results.append(dill.dumps((item, result)))
else:
del call, args, kwargs, keep_results, item, result
except Exception as e:
print e
traceback.print_exc()
os.kill(os.getpid(), signal.SIGUSR1)
self.queue.task_done()
def finish_pool_queue(self, callable=None):
self.frozen_pool.value = False
while self.queue._unfinished_tasks.get_value() > 0:
if self.print_queue:
print_info('--> Process pool... %s' % (self.queue._unfinished_tasks.get_value()))
if callable:
callable()
sleep(5)
for i in range(self.queue_processes):
self.queue.put(None)
self.queue.join()
self.queue.close()
for p in self.processes:
with ignore_exception: p.join(10)
with ignore_exception: p.terminate()
with ignore_exception: del self.processes[:]
def get_pool_results(self):
return self.pool_results
def clear_pool_results(self):
del self.pool_results[:]
def test(eg):
print 'EG', eg
Call with either:
tp = ThreadPool(queue_threads=2)
tp.queue.put({'call': test, 'args': [random.randint(0, 100)]})
tp.finish_pool_queue()
or
pp = ProcessPool(queue_processes=2)
pp.queue.put(dill.dumps({'call': test, 'args': [random.randint(0, 100)]}))
pp.queue.put(dill.dumps({'call': test, 'args': [random.randint(0, 100)]}))
pp.finish_pool_queue()
A multi-producers and multi-consumers example, verified. It should be easy to modify it to cover other cases, single/multi producers, single/multi consumers.
from multiprocessing import Process, JoinableQueue
import time
import os
q = JoinableQueue()
def producer():
for item in range(30):
time.sleep(2)
q.put(item)
pid = os.getpid()
print(f'producer {pid} done')
def worker():
while True:
item = q.get()
pid = os.getpid()
print(f'pid {pid} Working on {item}')
print(f'pid {pid} Finished {item}')
q.task_done()
for i in range(5):
p = Process(target=worker, daemon=True).start()
# send thirty task requests to the worker
producers = []
for i in range(2):
p = Process(target=producer)
producers.append(p)
p.start()
# make sure producers done
for p in producers:
p.join()
# block until all workers are done
q.join()
print('All work completed')
Explanation:
Two producers and five consumers in this example.
JoinableQueue is used to make sure all elements stored in queue will be processed. 'task_done' is for worker to notify an element is done. 'q.join()' will wait for all elements marked as done.
With #2, there is no need to join wait for every worker.
But it is important to join wait for every producer to store element into queue. Otherwise, program exit immediately.

kill a function after a certain time in windows

I've read a lot of posts about using threads, subprocesses, etc.. A lot of it seems over complicated for what I'm trying to do...
All I want to do is stop executing a function after X amount of time has elapsed.
def big_loop(bob):
x = bob
start = time.time()
while True:
print time.time()-start
This function is an endless loop that never throws any errors or exceptions, period.
I"m not sure the difference between "commands, shells, subprocesses, threads, etc.." and this function, which is why I'm having trouble manipulating subprocesses.
I found this code here, and tried it but as you can see it keeps printing after 10 seconds have elapsed:
import time
import threading
import subprocess as sub
import time
class RunCmd(threading.Thread):
def __init__(self, cmd, timeout):
threading.Thread.__init__(self)
self.cmd = cmd
self.timeout = timeout
def run(self):
self.p = sub.Popen(self.cmd)
self.p.wait()
def Run(self):
self.start()
self.join(self.timeout)
if self.is_alive():
self.p.terminate()
self.join()
def big_loop(bob):
x = bob
start = time.time()
while True:
print time.time()-start
RunCmd(big_loop('jimijojo'), 10).Run() #supposed to quit after 10 seconds, but doesn't
x = raw_input('DONEEEEEEEEEEEE')
What's a simple way this function can be killed. As you can see in my attempt above, it doesn't terminate after 20 seconds and just keeps on going...
***OH also, I've read about using signal, but I"m on windows so I can't use the alarm feature.. (python 2.7)
**assume the "infinitely running function" can't be manipulated or changed to be non-infinite, if I could change the function, well I'd just change it to be non infinite wouldn't I?
Here are some similar questions, which I haven't able to port over their code to work with my simple function:
Perhaps you can?
Python: kill or terminate subprocess when timeout
signal.alarm replacement in Windows [Python]
Ok I tried an answer I received, it works.. but how can I use it if I remove the if __name__ == "__main__": statement? When I remove this statement, the loop never ends as it did before..
import multiprocessing
import Queue
import time
def infinite_loop_function(bob):
var = bob
start = time.time()
while True:
time.sleep(1)
print time.time()-start
print 'this statement will never print'
def wrapper(queue, bob):
result = infinite_loop_function(bob)
queue.put(result)
queue.close()
#if __name__ == "__main__":
queue = multiprocessing.Queue(1) # Maximum size is 1
proc = multiprocessing.Process(target=wrapper, args=(queue, 'var'))
proc.start()
# Wait for TIMEOUT seconds
try:
timeout = 10
result = queue.get(True, timeout)
except Queue.Empty:
# Deal with lack of data somehow
result = None
finally:
proc.terminate()
print 'running other code, now that that infinite loop has been defeated!'
print 'bla bla bla'
x = raw_input('done')
Use the building blocks in the multiprocessing module:
import multiprocessing
import Queue
TIMEOUT = 5
def big_loop(bob):
import time
time.sleep(4)
return bob*2
def wrapper(queue, bob):
result = big_loop(bob)
queue.put(result)
queue.close()
def run_loop_with_timeout():
bob = 21 # Whatever sensible value you need
queue = multiprocessing.Queue(1) # Maximum size is 1
proc = multiprocessing.Process(target=wrapper, args=(queue, bob))
proc.start()
# Wait for TIMEOUT seconds
try:
result = queue.get(True, TIMEOUT)
except Queue.Empty:
# Deal with lack of data somehow
result = None
finally:
proc.terminate()
# Process data here, not in try block above, otherwise your process keeps running
print result
if __name__ == "__main__":
run_loop_with_timeout()
You could also accomplish this with a Pipe/Connection pair, but I'm not familiar with their API. Change the sleep time or TIMEOUT to check the behaviour for either case.
There is no straightforward way to kill a function after a certain amount of time without running the function in a separate process. A better approach would probably be to rewrite the function so that it returns after a specified time:
import time
def big_loop(bob, timeout):
x = bob
start = time.time()
end = start + timeout
while time.time() < end:
print time.time() - start
# Do more stuff here as needed
Can't you just return from the loop?
start = time.time()
endt = start + 30
while True:
now = time.time()
if now > endt:
return
else:
print end - start
import os,signal,time
cpid = os.fork()
if cpid == 0:
while True:
# do stuff
else:
time.sleep(10)
os.kill(cpid, signal.SIGKILL)
You can also check in the loop of a thread for an event, which is more portable and flexible as it allows other reactions than brute killing. However, this approach fails if # do stuff can take time (or even wait forever on some event).

Categories