I am using threads to check a header status code from an API url. How can i break loop/stop all other threads if condition is true. Please check following code..
import logging, time, threading, requests
#: Log items
logging.basicConfig(format='%(asctime)s %(levelname)s : %(message)s', level=logging.INFO)
class EppThread(threading.Thread):
def __init__(self, group=None, target=None, name=None, args=(), kwargs=None, verbose=None):
threading.Thread.__init__(self, group=group, target=target, name=name, verbose=verbose)
self.args = args
def run(self):
startTime = time.time()
url = self.args[0]
limit = self.args[1]
for i in range(limit):
response = requests.get(url)
if response.status_code != 200:
break
#Exit other similar threads (with same url)
else:
print('Thread {0} - success'.format(thread.getName()))
print('process completed')
# Send Email
number_of_threads = 5
number_of_requests = 100
urls = ['https://v1.api.com/example', 'https://v2.api.com/example']
if __name__ == '__main__':
startTime = time.time()
for url in urls:
threads = []
for i in range(number_of_threads):
et = EppThread(name = "{0}-Thread-{1}".format(name, i + 1), args=(url, number_of_requests))
threads.append(et)
et.start()
# Check if execution time is not greater than 1 minute
while len(threads) > 0 and (time.time() - startTime) < 60:
time.sleep(0.5)
for thread in threads:
if not thread.isAlive():
threads.remove(thread)
print('Thread {0} terminated'.format(thread.getName()))
os._exit(1)
Please suggest some better ways that stops code execution if condition gets true in any running thread.
Thanks for your help.
An important thing to note here is that when the run method of a Thread is complete, the Thread is set to dead and garbage collected. So all we really need is a boolean class variable that breaks that loop. Class variables are the same for all objects instantiated from that class and subclasses; so once we set it, all of the objects in our class will act the same way:
import logging, time, threading, requests
#: Log items
logging.basicConfig(format='%(asctime)s %(levelname)s : %(message)s', level=logging.INFO)
class EppThread(threading.Thread):
kill = False # new Boolean class variable
url = 'https://v1.api.com/example' # keep this in mind for later
def __init__(self, group=None, target=None, name=None, args=(), kwargs=None, verbose=None):
threading.Thread.__init__(self, group=group, target=target, name=name, verbose=verbose)
self.args = args
def run(self):
limit = self.args[0]
for i in range(limit):
response = requests.get(self.url)
if response.status_code != 200:
self.kill = True # ends this loop on all Threads since it's changing a class variable
else:
print('Thread {0} - success'.format(self.getName())) # changed to self.getName()
if self.kill: # if kill is True, break the loop, send the email, and finish the Thread
break
print('process completed')
# Send Email
number_of_threads = 5
number_of_requests = 100
if __name__ == '__main__':
startTime = time.time()
threads = []
for i in range(number_of_threads):
et = EppThread(name="{0}-Thread-{1}".format(name, i + 1), args=(number_of_requests))
threads.append(et)
et.start()
# Check if execution time is not greater than 1 minute
while threads and time.time() - startTime < 60: # removed len() due to implicit Falsiness of empty containers in Python
time.sleep(0.5)
for thread in threads:
if not thread.isAlive():
threads.remove(thread)
print('Thread {0} terminated'.format(thread.getName()))
EppThread.kill = True
Now when any of the EppThreads has a bad connection it sets the class variable to True, which makes all of the other EppThreads break the loop as well. I also added EppThread.kill = True at the end so it'll break the request loops more cleanly if you exceed 1 minute run time.
Lastly, I added the url class variable. This is because you expressed interest in running different urls simultaneously and only kill the ones that specifically have a bad connection. All you have to do at this point is subclass EppThread and overwrite kill and url.
class EppThread2(EppThread):
kill = False
url = 'https://v2.example.com/api?$awesome=True'
Then you can instantiate EppThread2 and add it to the threads list and everything should work as you want it to.
You could create an event object that's shared between all your threads that share the same url. When you run into an error in the thread, set the event. Then, in your run loop check for the event. If it has happend, kill the thread by breaking the loop.
Here's a version of your example modified to use the Event.
import logging, time, threading, requests
#: Log items
logging.basicConfig(format='%(asctime)s %(levelname)s : %(message)s', level=logging.INFO)
class EppThread(threading.Thread):
def __init__(self, group=None, target=None, name=None, args=(), kwargs=None, verbose=None, bad_status=None):
threading.Thread.__init__(self, group=group, target=target, name=name, verbose=verbose)
self.args = args
self.bad_status = bad_status
def run(self):
startTime = time.time()
url = self.args[0]
limit = self.args[1]
for i in range(limit):
if self.bad_status.is_set():
# break the loop on errors in any thread.
break
response = requests.get(url)
if response.status_code != 200:
# Set the event when an error occurs
self.bad_status.set()
break
#Exit other similar threads (with same url)
else:
print('Thread {0} - success'.format(thread.getName()))
print('process completed')
# Send Email
number_of_threads = 5
number_of_requests = 100
urls = ['https://v1.api.com/example', 'https://v2.api.com/example']
if __name__ == '__main__':
startTime = time.time()
threads = []
for url in urls:
# Create an event for each URL
bad_status = threading.Event()
for i in range(number_of_threads):
et = EppThread(name = "{0}-Thread-{1}".format(name, i + 1), args=(url, number_of_requests), bad_status=bad_status)
threads.append(et)
et.start()
# Check if execution time is not greater than 1 minute
while len(threads) > 0 and (time.time() - startTime) < 60:
time.sleep(0.5)
for thread in threads:
if not thread.isAlive():
threads.remove(thread)
print('Thread {0} terminated'.format(thread.getName()))
os._exit(1)
The threading.Event class works for both threads and processes. So, if at somepoint you wanted to switch to using Process it would "just work".
Import sys
Here is an example:
import sys
list = []
if len(list) < 1:
sys.exit("You don\'t have any items in your list")
Related
This isn't as much of a question as something I'm interested in.
I do quite a bit of asynchronous coding in Python, and there's a bit of code that I'm frequently writing over and over while I'm waiting for threads to stop (if I'm trying to exit cleanly).
while not class_containing_threads.stopped:
pass
else:
do_something()
do_something_else()
do_some_other_thing()
Although I'm sure there's a nice decorator that one can write to make this happen, I'm not too sure how I would go about writing it without ultimately making my code more complicated than it needs to be.
Basically, I wish there were something along the lines of:
when condition:
do_something()
where the thread is effectively halted while we wait for some event to occur.
To demonstrate what I mean, here's some working code that shows how much I actually end up writing the same thing over and over
import threading
import random
import time
class ClassContainingThreads:
def __init__(self):
# Just stating what stuff can be found here
self._coordinating_thread = None
self._worker_thread_1 = None
self._worker_thread_2 = None
self._worker_thread_3 = None
self._stopping = False
self._stopped = False
def run(self):
# Main method to get everything running
self._coordinating_thread = threading.Thread(target=self._run)
self._coordinating_thread.start()
def stop(self):
# Used to stop everything
self._stopping = True
#property
def stopped(self):
# Lets you know when things have stopped
return self._stopped
#property
def all_workers_running(self):
# Lets you know whether all the workers are running
return self._all_workers_are_alive()
def _run(self):
# Coordinating thread getting worker threads to start
self._worker_thread_1 = threading.Thread(
target=self._important_function_1)
self._worker_thread_2 = threading.Thread(
target=self._important_function_2)
self._worker_thread_3 = threading.Thread(
target=self._important_function_3)
self._worker_thread_1.start()
self._worker_thread_2.start()
self._worker_thread_3.start()
# Coincidentally, the block appears here
while not self._stopping:
pass
else:
while self._any_workers_are_alive():
pass
else:
self._stopping = False
self._stopped = True
def _important_function_1(self):
print(f'Thread 1 started')
# Coincidentally, the block appears here
while not self._stopping:
pass
else:
print('Thread 1 received stop signal')
# Emulating some process that takes some unknown time to stop
delay_long = random.random() * 5
delay_start = time.time()
while not (time.time() - delay_start) > delay_long:
pass
else:
print(f'Thread 1 stopped')
def _important_function_2(self):
print(f'Thread 2 started')
# Coincidentally, the block appears here
while not self._stopping:
pass
else:
print('Thread 2 received stop signal')
# Emulating some process that takes some unknown time to stop
delay = random.random() * 5
delay_start = time.time()
while not (time.time() - delay_start) > delay:
pass
else:
print(f'Thread 2 stopped')
def _important_function_3(self):
print(f'Thread 3 started')
# Coincidentally, the block appears here
while not self._stopping:
pass
else:
print('Thread 3 received stop signal')
# Emulating some process that takes some unknown time to stop
delay = random.random() * 5
delay_start = time.time()
while not (time.time() - delay_start) > delay:
pass
else:
print(f'Thread 3 stopped')
def _any_workers_are_alive(self):
# Check whether any workers are alive
if (self._worker_thread_1.is_alive() or
self._worker_thread_2.is_alive() or
self._worker_thread_3.is_alive()):
return True
else:
return False
def _all_workers_are_alive(self):
# Check whether all workers are alive
if (self._worker_thread_1.is_alive() and
self._worker_thread_2.is_alive() and
self._worker_thread_3.is_alive()):
return True
else:
return False
if __name__ == '__main__':
# Just booting everything up
print('Program started')
class_containing_threads = ClassContainingThreads()
class_containing_threads.run()
# Block I'm interested in appears here
while not class_containing_threads.all_workers_running:
pass
else:
# and here
while not input("Type 'exit' to exit > ") == "exit":
pass
else:
class_containing_threads.stop()
# and here
while not class_containing_threads.stopped:
pass
else:
print('Program stopped')
exit() # I know this is pointless here
Also, critiques are welcome.
The pattern of repeatedly checking a flag is a form of busy wait. This is an extremely wasteful pattern, as the task checking the flag will do so very, very often.
Concrete alternatives depend on the concurrency pattern used, but usually come in the form of signals, events or locks – these are generally known as "synchronisation primitives".
For example, threading provides a threading.Event that can be "waited for" and "triggered". The desired operation when condition: is simply event.wait() – this automatically pauses the current thread until the event is triggered. Another thread can trigger this condition via event.set().
Thanks to the feedback, I've rewritten the code snippet into something that uses the threading.Thread.join() method and threading.Event object. It's much simpler now, and hopefully doesn't involve any unintentional busy waiting.
import threading
import random
import time
class ClassContainingThreads:
def __init__(self, blocking_event):
# Just stating what stuff can be found here
self._blocking_event = blocking_event
self._coordinating_thread = None
self._worker_thread_1 = None
self._worker_thread_2 = None
self._worker_thread_3 = None
self._stopping = False
self._stopped = False
def run(self):
# Main method to get everything running
self._coordinating_thread = threading.Thread(target=self._run)
self._coordinating_thread.start()
def stop(self):
# Used to stop everything
self._stopping = True
#property
def stopped(self):
return self._stopped
def _run(self):
# Coordinating thread getting worker threads to start
self._worker_thread_1 = threading.Thread(
target=self._important_function_1)
self._worker_thread_2 = threading.Thread(
target=self._important_function_2)
self._worker_thread_3 = threading.Thread(
target=self._important_function_3)
# Start the workers
self._worker_thread_1.start()
self._worker_thread_2.start()
self._worker_thread_3.start()
# Let main_thread continue when workers have started
self._blocking_event.set()
# Wait for workers to complete
self._worker_thread_1.join()
self._worker_thread_2.join()
self._worker_thread_3.join()
# Once all threads are dead
self._stopping = False
self._stopped = True
self._blocking_event.set()
def _important_function_1(self):
print(f'Thread 1 started')
# Emulating some job being done
while not self._stopping:
pass
else:
print('Thread 1 received stop signal')
# Emulating some process that takes some unknown time to stop
delay_long = random.random() * 5
delay_start = time.time()
while not (time.time() - delay_start) > delay_long:
pass
else:
print(f'Thread 1 stopped')
def _important_function_2(self):
print('Thread 2 started')
# Emulating some job being done
while not self._stopping:
pass
else:
print('Thread 2 received stop signal')
# Emulating some process that takes some unknown time to stop
delay = random.random() * 5
delay_start = time.time()
while not (time.time() - delay_start) > delay:
pass
else:
print(f'Thread 2 stopped')
def _important_function_3(self):
print('Thread 3 started')
# Emulating some job being done
while not self._stopping:
pass
else:
print('Thread 3 received stop signal')
# Emulating some process that takes some unknown time to stop
delay = random.random() * 5
delay_start = time.time()
while not (time.time() - delay_start) > delay:
pass
else:
print(f'Thread 3 stopped')
if __name__ == '__main__':
# Just booting everything up
print('Program started')
blocking_event = threading.Event()
class_containing_threads = ClassContainingThreads(blocking_event)
class_containing_threads.run()
blocking_event.wait()
while not input("Type 'exit' to exit > ") == "exit":
pass
else:
class_containing_threads.stop()
blocking_event.wait()
print('Program stopped')
exit() # I know this is pointless here
How can I start and stop a thread with my poor thread class?
It is in loop, and I want to restart it again at the beginning of the code. How can I do start-stop-restart-stop-restart?
My class:
import threading
class Concur(threading.Thread):
def __init__(self):
self.stopped = False
threading.Thread.__init__(self)
def run(self):
i = 0
while not self.stopped:
time.sleep(1)
i = i + 1
In the main code, I want:
inst = Concur()
while conditon:
inst.start()
# After some operation
inst.stop()
# Some other operation
You can't actually stop and then restart a thread since you can't call its start() method again after its run() method has terminated. However you can make one pause and then later resume its execution by using a threading.Condition variable to avoid concurrency problems when checking or changing its running state.
threading.Condition objects have an associated threading.Lock object and methods to wait for it to be released and will notify any waiting threads when that occurs. Here's an example derived from the code in your question which shows this being done. In the example code I've made the Condition variable a part of Thread subclass instances to better encapsulate the implementation and avoid needing to introduce additional global variables:
from __future__ import print_function
import threading
import time
class Concur(threading.Thread):
def __init__(self):
super(Concur, self).__init__()
self.iterations = 0
self.daemon = True # Allow main to exit even if still running.
self.paused = True # Start out paused.
self.state = threading.Condition()
def run(self):
self.resume()
while True:
with self.state:
if self.paused:
self.state.wait() # Block execution until notified.
# Do stuff...
time.sleep(.1)
self.iterations += 1
def pause(self):
with self.state:
self.paused = True # Block self.
def resume(self):
with self.state:
self.paused = False
self.state.notify() # Unblock self if waiting.
class Stopwatch(object):
""" Simple class to measure elapsed times. """
def start(self):
""" Establish reference point for elapsed time measurements. """
self.start_time = time.time()
return self
#property
def elapsed_time(self):
""" Seconds since started. """
try:
return time.time() - self.start_time
except AttributeError: # Wasn't explicitly started.
self.start_time = time.time()
return 0
MAX_RUN_TIME = 5 # Seconds.
concur = Concur()
stopwatch = Stopwatch()
print('Running for {} seconds...'.format(MAX_RUN_TIME))
concur.start()
while stopwatch.elapsed_time < MAX_RUN_TIME:
concur.resume()
# Can also do other concurrent operations here...
concur.pause()
# Do some other stuff...
# Show Concur thread executed.
print('concur.iterations: {}'.format(concur.iterations))
This is David Heffernan's idea fleshed-out. The example below runs for 1 second, then stops for 1 second, then runs for 1 second, and so on.
import time
import threading
import datetime as DT
import logging
logger = logging.getLogger(__name__)
def worker(cond):
i = 0
while True:
with cond:
cond.wait()
logger.info(i)
time.sleep(0.01)
i += 1
logging.basicConfig(level=logging.DEBUG,
format='[%(asctime)s %(threadName)s] %(message)s',
datefmt='%H:%M:%S')
cond = threading.Condition()
t = threading.Thread(target=worker, args=(cond, ))
t.daemon = True
t.start()
start = DT.datetime.now()
while True:
now = DT.datetime.now()
if (now-start).total_seconds() > 60: break
if now.second % 2:
with cond:
cond.notify()
The implementation of stop() would look like this:
def stop(self):
self.stopped = True
If you want to restart, then you can just create a new instance and start that.
while conditon:
inst = Concur()
inst.start()
#after some operation
inst.stop()
#some other operation
The documentation for Thread makes it clear that the start() method can only be called once for each instance of the class.
If you want to pause and resume a thread, then you'll need to use a condition variable.
I have a queue.Queue that is filled by a thread. A method tries to receive from this queue with a timeout. Now let's say, that another thread can reset the timeout of our queue-waiting, and if the queue is not fed in time, our receiver function should go on, with the updated timeout.
I can achieve this as below, however I had to modify the builtin queue.Queue class, so that the endtime parameter in the get() method can be modified during waiting...
Is there any better solution for this? (I don't want to use asyncio...)
from threading import Thread
from queue import Queue, Empty
import time
q = Queue()
TIMEOUT = 1
RESET_TIME = 0.5
PUT_TIME = 1.2
t0 = time.time()
def receive():
try:
_res = q.get(block=True, timeout=TIMEOUT)
print(f'get # {time.time()-t0}')
return _res
except Empty:
print(f'to # {time.time()-t0}')
return None
def feed_queue():
time.sleep(PUT_TIME)
print(f'put # {time.time()-t0}')
q.put_nowait(42)
def reset_timeout():
time.sleep(RESET_TIME)
with q.mutex:
q.endtime += TIMEOUT
print(f'reset # {time.time()-t0}')
if __name__ == '__main__':
Thread(target=feed_queue).start()
Thread(target=reset_timeout).start()
res = receive()
print('res:', res)
This yields:
reset # 0.5013222694396973
put # 1.201164722442627
get # 1.201164722442627
res: 42
The following modification has been done in queue.py for this to work:
Index: queue.py
===================================================================
--- queue.py (revision 28725)
+++ queue.py (working copy)
## -52,6 +52,7 ##
# drops to zero; thread waiting to join() is notified to resume
self.all_tasks_done = threading.Condition(self.mutex)
self.unfinished_tasks = 0
+ self.endtime = 0
def task_done(self):
'''Indicate that a formerly enqueued task is complete.
## -171,9 +172,9 ##
elif timeout < 0:
raise ValueError("'timeout' must be a non-negative number")
else:
- endtime = time() + timeout
+ self.endtime = time() + timeout
while not self._qsize():
- remaining = endtime - time()
+ remaining = self.endtime - time()
if remaining <= 0.0:
raise Empty
self.not_empty.wait(remaining)
You can create your own class, inheriting Queue and adding the global variable endtime like this:
class Waszil(Queue):
def __init__(self, maxsize=0):
super().__init__(self)
self.maxsize = maxsize
self._init(maxsize)
self.endtime = 0
Then just change q = Queue() to q = Waszil() and you should be good to go.
EDIT:
If you prefer to make inherent thread safety in the Waszil class, you can use threading.Lock like this:
from threading import Lock
class Waszil(Queue):
def __init__(self, maxsize=0):
super().__init__(self)
self.threadLock = Lock()
self.maxsize = maxsize
self._init(maxsize)
self.endtime = 0
def increment_endtime(self):
with self.threadLock:
self.endtime += 1
In this case, instead of your
with q.mutex:
q.endtime += TIMEOUT
you simply call q.increment_endtime()
I want to run a code with process parallel to my main code but also want to access its parameters or start/stop the process via command prompt.
my machine is win7 64bit. Something in mind is:
from multiprocessing import Process
class dllapi():
...
def apiloop(params, args):
apiclient = dllapi(**args)
while True:
apiclient.cycle()
params = [....]
def mainloop(args):
p = Process(target = apiloop, args=(params, args, ))
while True:
cmd = input()
if cmd == 'kill':
p.terminate()
if cmd == 'stop':
pass # no idea
if cmd == 'resume':
pass # no idea
if cmd == 'report':
print (params)
I wish to make it simple. I did tried to make apiloop as thread yet input() could freeze the program and stopped apiloop working until i pressed enter...
To share the parameters from apiloop process, i did try queue and pipe, but, seem to me, queue needs .join to wait until apiloop is done and pipe has buffer limit.
(actually i can make apiclient.cycle runs every 1s but i wish to keep apiclient alive)
I wish to know if it's worth to dig deeper with multiprocessing (e.g. will try manager as well...) or there are other approaches which is more suitable for my case. Thanks in advance...
* UPDATED: 201809170953*
Some progress with manager as below:
from multiprocessing import Process, Manager
class dllapi():
...
class webclientapi():
...
def apiloop(args, cmd, params):
apiclient = dllapi(**args)
status = True
while True:
# command from main
if cmd == 'stop':
status = False
elif cmd == 'start':
status = True
cmd = None
# stop or run
if status == True:
apiclient.cycle()
# update parameters
params['status'] = status
def uploadloop(cmds, params):
uploadclient = webclientapi()
status = True
while True:
# command from main
if cmd == 'stop':
status = False
elif cmd == 'start':
status = True
cmd = None
# stop or run
if status == True:
# upload 'status' from apiclient to somewhere
uploadclient.cycle(params['status'])
def mainloop(args):
manager = Manager()
mpcmds = {}
mpparams = {}
mps = {}
mpcmds ['apiloop'] = manager.Value('u', 'start')
mpparams ['apiloop'] = manager.dict()
mps ['apiloop'] = Process(target = apiloop, args=(args, mpcmds['apiloop'], mpparams['apiloop'])
mpcmds ['uploadloop'] = manager.Value('u', 'start')
# mpparams ['uploadloop'] is directly from mpparams ['apiloop']
mps ['uploadloop'] = Process(target = uploadloop, args=(mpcmds['uploadloop'], mpparams['apiloop'])
for key, mp in mps.items():
mp.daemon = True
mp.start()
while True:
cmd = input().split(' ')
# kill daemon process with exit()
if cmd[0] == 'bye':
exit()
# kill individual process
if cmd[0] == 'kill':
mps[cmd[1]].terminate()
# stop individual process via command
if cmd[0] == 'stop':
mpcmds[cmd[1]] = 'stop'
# stop individual process via command
if cmd[0] == 'start':
mpcmds[cmd[1]] = 'start'
# report individual process info via command
if cmd[0] == 'report':
print (mpparams ['apiloop'])
Hope this'd help someone.
I'm showing you how to solve the general problem with threads only, because that is what you tried first and your example doesn't bring up the need for a child-process.
In the example below your dllapi class is named Zoo and it's subclassing threading.Thread, adding some methods to allow execution control. It takes some data upon initialization and its cycle-method simply iterates repeatedly over this data and just counts how many times it has seen the specific item.
import time
import logging
from queue import Queue
from threading import Thread
from itertools import count, cycle
class Zoo(Thread):
_ids = count(1)
def __init__(self, cmd_queue, data, *args,
log_level=logging.DEBUG, **kwargs):
super().__init__()
self.name = f'{self.__class__.__name__.lower()}-{next(self._ids)}'
self.data = data
self.log_level = log_level
self.args = args
self.kwargs = kwargs
self.logger = self._init_logging()
self.cmd_queue = cmd_queue
self.data_size = len(data)
self.actual_item = None
self.iter_cnt = 0
self.cnt = count(1)
self.cyc = cycle(self.data)
def cycle(self):
item = next(self.cyc)
if next(self.cnt) % self.data_size == 0: # new iteration round
self.iter_cnt += 1
self.actual_item = f'{item}_{self.iter_cnt}'
def run(self):
"""
Run is the main-function in the new thread. Here we overwrite run
inherited from threading.Thread.
"""
while True:
if self.cmd_queue.empty():
self.cycle()
time.sleep(1) # optional heartbeat
else:
self._get_cmd()
self.cmd_queue.task_done() # unblocks prompter
def stop(self):
self.logger.info(f'stopping with actual item: {self.actual_item}')
# do clean up
raise SystemExit
def pause(self):
self.logger.info(f'pausing with actual item: {self.actual_item}')
self.cmd_queue.task_done() # unblocks producer joining the queue
self._get_cmd() # just wait blockingly until next command
def resume(self):
self.logger.info(f'resuming with actual item: {self.actual_item}')
def report(self):
self.logger.info(f'reporting with actual item: {self.actual_item}')
print(f'completed {self.iter_cnt} iterations over data')
def _init_logging(self):
fmt = '[%(asctime)s %(levelname)-8s %(threadName)s' \
' %(funcName)s()] --- %(message)s'
logging.basicConfig(format=fmt, level=self.log_level)
return logging.getLogger()
def _get_cmd(self):
cmd = self.cmd_queue.get()
try:
self.__class__.__dict__[cmd](self)
except KeyError:
print(f'Command `{cmd}` is unknown.')
input is a blocking function. You need to outsource it in a separate thread so it doesn't block your main-thread. In the example below input is wrapped in Prompter, a class subclassing threading.Thread. Prompter passes inputs into a command-queue. This command-queue is read by Zoo.
class Prompter(Thread):
"""Prompt user for command input.
Runs in a separate thread so the main-thread does not block.
"""
def __init__(self, cmd_queue):
super().__init__()
self.cmd_queue = cmd_queue
def run(self):
while True:
cmd = input('prompt> ')
self.cmd_queue.put(cmd)
self.cmd_queue.join() # blocks until consumer calls task_done()
if __name__ == '__main__':
data = ['ape', 'bear', 'cat', 'dog', 'elephant', 'frog']
cmd_queue = Queue()
prompter = Prompter(cmd_queue=cmd_queue)
prompter.daemon = True
zoo = Zoo(cmd_queue=cmd_queue, data=data)
prompter.start()
zoo.start()
Example session in terminal:
$python control_thread_over_prompt.py
prompt> report
[2018-09-16 17:59:16,856 INFO zoo-1 report()] --- reporting with actual item: dog_0
completed 0 iterations over data
prompt> pause
[2018-09-16 17:59:26,864 INFO zoo-1 pause()] --- pausing with actual item: bear_2
prompt> resume
[2018-09-16 17:59:33,291 INFO zoo-1 resume()] --- resuming with actual item: bear_2
prompt> report
[2018-09-16 17:59:38,296 INFO zoo-1 report()] --- reporting with actual item: ape_3
completed 3 iterations over data
prompt> stop
[2018-09-16 17:59:42,301 INFO zoo-1 stop()] --- stopping with actual item: elephant_3
I am working on a scraper that rotates the ips, i have created a small mvp in a notebook that works as expected:
import logging
import time
import random
import threading
from datetime import datetime
from datetime import timedelta
logging.basicConfig(
level=logging.DEBUG,
format='(%(threadName)-10s) %(message)s',
)
class Controller(object):
def __init__(self, event):
self.start_time = datetime.now()
self.event = event
def worker(self):
while True:
if self.event.is_set():
rand_sleep_time = random.randint(1, 10) / 5
logging.debug("Sleeping for %.2f secs" % rand_sleep_time)
time.sleep(rand_sleep_time)
logging.debug("Werking")
else:
time.sleep(1)
def blocker(self):
while True:
rand_sleep_time = random.randint(3, 6)
logging.debug("Sleeping for %.2f secs" % rand_sleep_time)
time.sleep(rand_sleep_time)
if datetime.now() > self.start_time + timedelta(seconds=10):
self.event.clear() # only stop the execution for when the ip is updated
logging.debug("ALL THREADS SLEEP NOW!")
time.sleep(10)
self.event.set() # you can now proceed with the computations
self.start_time = datetime.now()
start_time = datetime.now()
e = threading.Event()
e.set()
c = Controller(e)
for thread in range(NUM_THREADS):
t = threading.Thread(target=c.worker, name='Thread-Worker-{}'.format(thread+1))
t.start()
threading.Thread(target=c.blocker, name='Thread-Blocker-1').start()
So the workers above do some work, then the blocker halts all of them for a brief moment of time, while it updates the "ip", and then the workers start the work again. Taking this logic and implementing it into production, this fails (because I assume the workers do not stop). Unfortunately, I cannot include all of the code, but here is the main part. Hopefully, this is enough, as other parts are not related to the fact that the Ip-Updater does not stop other threads. The only difference in this implementation is that I have used classes, perhaps that should be changed (because the methods have self arg and I'm chaning it. But if the Ip-Updater were to successfully stop the other threads, then there should be no problem, no?):
class ThreadedNewsParser(object):
"""
This little guy parses the news with multiple threads and dynamically changes the ip of the sessions
"""
def __init__(self, update_ip_in, num_threads, date_start, date_end):
assert isinstance(num_threads, int)
assert num_threads > 0
assert any(isinstance(date_start, type_) for type_ in [datetime, date])
assert any(isinstance(date_end, type_) for type_ in [datetime, date])
self.start_time = datetime.now()
self.event = threading.Event()
self.event.set()
self.update_ip_in = update_ip_in
self.check_ip_url = 'https://httpbin.org/ip'
autolog("STARTING WORK ON IP: {}".format(session.get(self.check_ip_url).text), logging.debug)
self.num_threads = num_threads
self.date_start = date_start
self.date_end = date_end
self.dates = [date for date in date_range(date_start, date_end)]
self.p = DailyCompanyNewsParser(2008, 1, 1) # the date here does not matter
def worker(self):
while len(self.dates) > 0:
if self.event.is_set():
print("THREAD WERKING!")
pause = random.randint(1, 5) / 5
autolog('THREAD SLEEPING %.2f' % pause, logging.debug)
time.sleep(pause)
if len(self.dates) > 0:
date = self.dates.pop(0)
self.p.get_news_for_all_stocks(verbose=True, date_=date)
else:
print("THREAD SLEEPING")
time.sleep(10) # so that the threads do not check if the event is set instantaneously
def ip_updater(self): # this is the blocker
while len(self.dates) > 0:
autolog("IP_UPDATER SLEEPING FOR: {}".format(self.update_ip_in / 4), logging.debug)
time.sleep(self.update_ip_in / 4) # do not check the condition every instance
if datetime.now() > self.start_time + timedelta(seconds=self.update_ip_in):
print("ALL THREADS SLEEP NOW!")
autolog("ALL THREADS SLEEP NOW!", logging.info)
self.event.clear() # Make all other threads sleep so that we can update the IP
time.sleep(10)
get_new_ip()
self.start_time = datetime().now()
# autolog("Obtained new IP address: {}".format(session.get(self.check_ip_url).text), logging.debug)
autolog("ALL THREADS WAKE UP NOW!", logging.info)
print("ALL THREADS WAKE UP NOW!")
self.event.set()
def run(self):
for thread in range(self.num_threads):
t = threading.Thread(target=self.worker, name='Thread-Worker-{}'.format(thread+1))
t.start()
threading.Thread(target=self.ip_updater, name='Thread-IPUpdater-1').start()
Rewriting everything so that event and start_time are global variables does not solve the issue.. For example:
class ThreadedNewsParser(object):
"""
This little guy parses the news with multiple threads and dynamically changes the ip of the sessions
"""
def __init__(self, update_ip_in, num_threads, date_start, date_end):
assert isinstance(num_threads, int)
assert num_threads > 0
assert any(isinstance(date_start, type_) for type_ in [datetime, date])
assert any(isinstance(date_end, type_) for type_ in [datetime, date])
self.update_ip_in = update_ip_in
self.check_ip_url = 'https://httpbin.org/ip'
autolog("STARTING WORK ON IP: {}".format(session.get(self.check_ip_url).text), logging.debug)
self.num_threads = num_threads
self.date_start = date_start
self.date_end = date_end
self.dates = [date for date in date_range(date_start, date_end)]
self.p = DailyCompanyNewsParser(2008, 1, 1) # the date here does not matter
def worker(self):
global event
while len(self.dates) > 0:
if event.is_set():
print("THREAD WERKING!")
pause = random.randint(1, 5) / 5
autolog('THREAD SLEEPING %.2f' % pause, logging.debug)
time.sleep(pause)
if len(self.dates) > 0:
date = self.dates.pop(0)
self.p.get_news_for_all_stocks(verbose=True, date_=date)
else:
print("THREAD SLEEPING")
time.sleep(10) # so that the threads do not check if the event is set instantaneously
def ip_updater(self): # this is the blocker
global start_time
global event
while len(self.dates) > 0:
autolog("IP_UPDATER SLEEPING FOR: {}".format(self.update_ip_in / 4), logging.debug)
time.sleep(self.update_ip_in / 4) # do not check the condition every instance
if datetime.now() > start_time + timedelta(seconds=self.update_ip_in):
print("ALL THREADS SLEEP NOW!")
autolog("ALL THREADS SLEEP NOW!", logging.info)
event.clear() # Make all other threads sleep so that we can update the IP
time.sleep(10)
get_new_ip()
start_time = datetime().now()
# autolog("Obtained new IP address: {}".format(session.get(self.check_ip_url).text), logging.debug)
autolog("ALL THREADS WAKE UP NOW!", logging.info)
print("ALL THREADS WAKE UP NOW!")
event.set()
def run(self):
for thread in range(self.num_threads):
t = threading.Thread(target=self.worker, name='Thread-Worker-{}'.format(thread+1))
t.start()
threading.Thread(target=self.ip_updater, name='Thread-IPUpdater-1').start()