I am working on a scraper that rotates the ips, i have created a small mvp in a notebook that works as expected:
import logging
import time
import random
import threading
from datetime import datetime
from datetime import timedelta
logging.basicConfig(
level=logging.DEBUG,
format='(%(threadName)-10s) %(message)s',
)
class Controller(object):
def __init__(self, event):
self.start_time = datetime.now()
self.event = event
def worker(self):
while True:
if self.event.is_set():
rand_sleep_time = random.randint(1, 10) / 5
logging.debug("Sleeping for %.2f secs" % rand_sleep_time)
time.sleep(rand_sleep_time)
logging.debug("Werking")
else:
time.sleep(1)
def blocker(self):
while True:
rand_sleep_time = random.randint(3, 6)
logging.debug("Sleeping for %.2f secs" % rand_sleep_time)
time.sleep(rand_sleep_time)
if datetime.now() > self.start_time + timedelta(seconds=10):
self.event.clear() # only stop the execution for when the ip is updated
logging.debug("ALL THREADS SLEEP NOW!")
time.sleep(10)
self.event.set() # you can now proceed with the computations
self.start_time = datetime.now()
start_time = datetime.now()
e = threading.Event()
e.set()
c = Controller(e)
for thread in range(NUM_THREADS):
t = threading.Thread(target=c.worker, name='Thread-Worker-{}'.format(thread+1))
t.start()
threading.Thread(target=c.blocker, name='Thread-Blocker-1').start()
So the workers above do some work, then the blocker halts all of them for a brief moment of time, while it updates the "ip", and then the workers start the work again. Taking this logic and implementing it into production, this fails (because I assume the workers do not stop). Unfortunately, I cannot include all of the code, but here is the main part. Hopefully, this is enough, as other parts are not related to the fact that the Ip-Updater does not stop other threads. The only difference in this implementation is that I have used classes, perhaps that should be changed (because the methods have self arg and I'm chaning it. But if the Ip-Updater were to successfully stop the other threads, then there should be no problem, no?):
class ThreadedNewsParser(object):
"""
This little guy parses the news with multiple threads and dynamically changes the ip of the sessions
"""
def __init__(self, update_ip_in, num_threads, date_start, date_end):
assert isinstance(num_threads, int)
assert num_threads > 0
assert any(isinstance(date_start, type_) for type_ in [datetime, date])
assert any(isinstance(date_end, type_) for type_ in [datetime, date])
self.start_time = datetime.now()
self.event = threading.Event()
self.event.set()
self.update_ip_in = update_ip_in
self.check_ip_url = 'https://httpbin.org/ip'
autolog("STARTING WORK ON IP: {}".format(session.get(self.check_ip_url).text), logging.debug)
self.num_threads = num_threads
self.date_start = date_start
self.date_end = date_end
self.dates = [date for date in date_range(date_start, date_end)]
self.p = DailyCompanyNewsParser(2008, 1, 1) # the date here does not matter
def worker(self):
while len(self.dates) > 0:
if self.event.is_set():
print("THREAD WERKING!")
pause = random.randint(1, 5) / 5
autolog('THREAD SLEEPING %.2f' % pause, logging.debug)
time.sleep(pause)
if len(self.dates) > 0:
date = self.dates.pop(0)
self.p.get_news_for_all_stocks(verbose=True, date_=date)
else:
print("THREAD SLEEPING")
time.sleep(10) # so that the threads do not check if the event is set instantaneously
def ip_updater(self): # this is the blocker
while len(self.dates) > 0:
autolog("IP_UPDATER SLEEPING FOR: {}".format(self.update_ip_in / 4), logging.debug)
time.sleep(self.update_ip_in / 4) # do not check the condition every instance
if datetime.now() > self.start_time + timedelta(seconds=self.update_ip_in):
print("ALL THREADS SLEEP NOW!")
autolog("ALL THREADS SLEEP NOW!", logging.info)
self.event.clear() # Make all other threads sleep so that we can update the IP
time.sleep(10)
get_new_ip()
self.start_time = datetime().now()
# autolog("Obtained new IP address: {}".format(session.get(self.check_ip_url).text), logging.debug)
autolog("ALL THREADS WAKE UP NOW!", logging.info)
print("ALL THREADS WAKE UP NOW!")
self.event.set()
def run(self):
for thread in range(self.num_threads):
t = threading.Thread(target=self.worker, name='Thread-Worker-{}'.format(thread+1))
t.start()
threading.Thread(target=self.ip_updater, name='Thread-IPUpdater-1').start()
Rewriting everything so that event and start_time are global variables does not solve the issue.. For example:
class ThreadedNewsParser(object):
"""
This little guy parses the news with multiple threads and dynamically changes the ip of the sessions
"""
def __init__(self, update_ip_in, num_threads, date_start, date_end):
assert isinstance(num_threads, int)
assert num_threads > 0
assert any(isinstance(date_start, type_) for type_ in [datetime, date])
assert any(isinstance(date_end, type_) for type_ in [datetime, date])
self.update_ip_in = update_ip_in
self.check_ip_url = 'https://httpbin.org/ip'
autolog("STARTING WORK ON IP: {}".format(session.get(self.check_ip_url).text), logging.debug)
self.num_threads = num_threads
self.date_start = date_start
self.date_end = date_end
self.dates = [date for date in date_range(date_start, date_end)]
self.p = DailyCompanyNewsParser(2008, 1, 1) # the date here does not matter
def worker(self):
global event
while len(self.dates) > 0:
if event.is_set():
print("THREAD WERKING!")
pause = random.randint(1, 5) / 5
autolog('THREAD SLEEPING %.2f' % pause, logging.debug)
time.sleep(pause)
if len(self.dates) > 0:
date = self.dates.pop(0)
self.p.get_news_for_all_stocks(verbose=True, date_=date)
else:
print("THREAD SLEEPING")
time.sleep(10) # so that the threads do not check if the event is set instantaneously
def ip_updater(self): # this is the blocker
global start_time
global event
while len(self.dates) > 0:
autolog("IP_UPDATER SLEEPING FOR: {}".format(self.update_ip_in / 4), logging.debug)
time.sleep(self.update_ip_in / 4) # do not check the condition every instance
if datetime.now() > start_time + timedelta(seconds=self.update_ip_in):
print("ALL THREADS SLEEP NOW!")
autolog("ALL THREADS SLEEP NOW!", logging.info)
event.clear() # Make all other threads sleep so that we can update the IP
time.sleep(10)
get_new_ip()
start_time = datetime().now()
# autolog("Obtained new IP address: {}".format(session.get(self.check_ip_url).text), logging.debug)
autolog("ALL THREADS WAKE UP NOW!", logging.info)
print("ALL THREADS WAKE UP NOW!")
event.set()
def run(self):
for thread in range(self.num_threads):
t = threading.Thread(target=self.worker, name='Thread-Worker-{}'.format(thread+1))
t.start()
threading.Thread(target=self.ip_updater, name='Thread-IPUpdater-1').start()
Related
I'm creating a script to monitor my mouse/keyboard activity. The intent is to update a timestamp whenever I move the mouse or press a button the keyboard. I've threaded the mouse and keyboard check methods and the main thread will check if we have passed the timeout/inactive duration and click at a specified location if we have.
Unfortunately, the keyboard monitoring is not working. The pynput.keyboard.Listener object seems to never join().
I'm not particularly comfortable with multithreading but I think I need it for this script. Please share a better way if there is one. I want to be able to run this script/class as a thread in another script later.
from pynput.keyboard import Listener
import pyautogui as gui
import threading, time
from datetime import datetime, timedelta
class activity(threading.Thread):
def __init__(self, timeout: int = 60):
self.stop_flag = False
self.timeout = timedelta(seconds=timeout)
self.last_timestamp = datetime.now()
def update_timestamp(self):
self.last_timestamp = datetime.now()
print('timestamp updated')
# For monitoring if the keyboard is active
def keybd_monitoring(self, lock: threading.Lock) -> None:
with Listener(on_release=lambda x: True) as listener:
listener.join()
lock.acquire()
self.update_timestamp()
lock.release()
print('Keyboard pressed')
if self.stop_flag:
return
# For monitoring if the mouse is active
def mouse_monitoring(self, lock: threading.Lock) -> None:
last_position = gui.position()
while not self.stop_flag:
time.sleep(3)
curr_position = gui.position()
if last_position != curr_position:
last_position = curr_position
lock.acquire()
self.update_timestamp()
lock.release()
print('Mouse Moved')
def stop(self):
self.stop_flag = True
# For monitoring if the mouse/keyboard have been used in the last TIMEOUT seconds
def run(self):
try:
width, height = gui.size()
lock = threading.Lock()
mouse = threading.Thread(target=self.mouse_monitoring, args=(lock,))
keybd = threading.Thread(target=self.keybd_monitoring, args=(lock,))
mouse.start()
keybd.start()
while not self.stop_flag:
time.sleep(.1)
if datetime.now() > self.last_timestamp + self.timeout:
curr_position = gui.position()
gui.click(int(width*.6),height)
gui.moveTo(curr_position)
finally:
self.stop()
if mouse.is_alive():
mouse.join()
if keybd.is_alive():
keybd.join()
if __name__ == '__main__':
act = activity()
act.run()
I've made it work without the monitoring functions being in a class. I'm still curious if it could work within a class.
from pynput.keyboard import Listener
import pyautogui as gui
import threading, time
from datetime import datetime, timedelta
stop_flag = False
timeout = timedelta(seconds=60)
last_timestamp = datetime.now()
lock = threading.Lock()
def update_timestamp(key=None):
lock.acquire()
global last_timestamp
last_timestamp = datetime.now()
lock.release()
return stop_flag
# For monitoring if the keyboard is active
def keybd_monitoring(lock: threading.Lock) -> None:
with Listener(on_release=update_timestamp) as listener:
listener.join()
# For monitoring if the mouse is active
def mouse_monitoring(lock: threading.Lock) -> None:
last_position = gui.position()
while not stop_flag:
time.sleep(3)
curr_position = gui.position()
if last_position != curr_position:
last_position = curr_position
update_timestamp()
def stop():
global stop_flag
stop_flag = True
# For monitoring if the mouse/keyboard have been used in the last TIMEOUT seconds
def activity():
try:
width, height = gui.size()
mouse = threading.Thread(target=mouse_monitoring, args=(lock,))
keybd = threading.Thread(target=keybd_monitoring, args=(lock,))
mouse.start()
keybd.start()
while not stop_flag:
time.sleep(1)
if datetime.now() > last_timestamp + timeout:
curr_position = gui.position()
gui.click(int(width*.6),height)
gui.moveTo(curr_position)
update_timestamp()
finally:
stop()
if mouse.is_alive():
mouse.join()
if keybd.is_alive():
keybd.join()
if __name__ == '__main__':
activity()
I want to create a timer in python with the following functions:
timer.start() - should start the timer
timer.pause() - should pause the timer
timer.resume() - should resume the timer
timer.get() - should return the current time
The timer should run from 0 upwards. It is meant to measure time, not trigger a callback function.
So if you start it, it should start counting the seconds like 0 1 2 3, if you pause it, it should be stilll at 3, but not going further. After its resumed it then goes on with 4 5 6 and so on
How can I do this?
Pause/Resume functions for timer is not a duplicate because I do not care about callbacks.
# mytimer.py
from datetime import datetime
import time
class MyTimer():
"""
timer.start() - should start the timer
timer.pause() - should pause the timer
timer.resume() - should resume the timer
timer.get() - should return the current time
"""
def __init__(self):
print('Initializing timer')
self.timestarted = None
self.timepaused = None
self.paused = False
def start(self):
""" Starts an internal timer by recording the current time """
print("Starting timer")
self.timestarted = datetime.now()
def pause(self):
""" Pauses the timer """
if self.timestarted is None:
raise ValueError("Timer not started")
if self.paused:
raise ValueError("Timer is already paused")
print('Pausing timer')
self.timepaused = datetime.now()
self.paused = True
def resume(self):
""" Resumes the timer by adding the pause time to the start time """
if self.timestarted is None:
raise ValueError("Timer not started")
if not self.paused:
raise ValueError("Timer is not paused")
print('Resuming timer')
pausetime = datetime.now() - self.timepaused
self.timestarted = self.timestarted + pausetime
self.paused = False
def get(self):
""" Returns a timedelta object showing the amount of time
elapsed since the start time, less any pauses """
print('Get timer value')
if self.timestarted is None:
raise ValueError("Timer not started")
if self.paused:
return self.timepaused - self.timestarted
else:
return datetime.now() - self.timestarted
if __name__ == "__main__":
t = MyTimer()
t.start()
print('Waiting 2 seconds'); time.sleep(2)
print(t.get())
print('Waiting 1 second'); time.sleep(1)
t.pause()
print('Waiting 2 seconds [paused]'); time.sleep(2)
print(t.get())
print('Waiting 1 second [paused]'); time.sleep(1)
print(t.get())
print('Waiting 1 second [paused]'); time.sleep(1)
t.resume()
print('Waiting 1 second'); time.sleep(1)
print(t.get())
Run
python mytimer.py
Output
Initializing timer
Starting timer
Waiting 2 seconds
Get timer value
0:00:02.001523
Waiting 1 second
Pausing timer
Waiting 2 seconds [paused]
Get timer value
0:00:03.004724
Waiting 1 second [paused]
Get timer value
0:00:03.004724
Waiting 1 second [paused]
Resuming timer
Waiting 1 second
Get timer value
0:00:04.008578
I am using threads to check a header status code from an API url. How can i break loop/stop all other threads if condition is true. Please check following code..
import logging, time, threading, requests
#: Log items
logging.basicConfig(format='%(asctime)s %(levelname)s : %(message)s', level=logging.INFO)
class EppThread(threading.Thread):
def __init__(self, group=None, target=None, name=None, args=(), kwargs=None, verbose=None):
threading.Thread.__init__(self, group=group, target=target, name=name, verbose=verbose)
self.args = args
def run(self):
startTime = time.time()
url = self.args[0]
limit = self.args[1]
for i in range(limit):
response = requests.get(url)
if response.status_code != 200:
break
#Exit other similar threads (with same url)
else:
print('Thread {0} - success'.format(thread.getName()))
print('process completed')
# Send Email
number_of_threads = 5
number_of_requests = 100
urls = ['https://v1.api.com/example', 'https://v2.api.com/example']
if __name__ == '__main__':
startTime = time.time()
for url in urls:
threads = []
for i in range(number_of_threads):
et = EppThread(name = "{0}-Thread-{1}".format(name, i + 1), args=(url, number_of_requests))
threads.append(et)
et.start()
# Check if execution time is not greater than 1 minute
while len(threads) > 0 and (time.time() - startTime) < 60:
time.sleep(0.5)
for thread in threads:
if not thread.isAlive():
threads.remove(thread)
print('Thread {0} terminated'.format(thread.getName()))
os._exit(1)
Please suggest some better ways that stops code execution if condition gets true in any running thread.
Thanks for your help.
An important thing to note here is that when the run method of a Thread is complete, the Thread is set to dead and garbage collected. So all we really need is a boolean class variable that breaks that loop. Class variables are the same for all objects instantiated from that class and subclasses; so once we set it, all of the objects in our class will act the same way:
import logging, time, threading, requests
#: Log items
logging.basicConfig(format='%(asctime)s %(levelname)s : %(message)s', level=logging.INFO)
class EppThread(threading.Thread):
kill = False # new Boolean class variable
url = 'https://v1.api.com/example' # keep this in mind for later
def __init__(self, group=None, target=None, name=None, args=(), kwargs=None, verbose=None):
threading.Thread.__init__(self, group=group, target=target, name=name, verbose=verbose)
self.args = args
def run(self):
limit = self.args[0]
for i in range(limit):
response = requests.get(self.url)
if response.status_code != 200:
self.kill = True # ends this loop on all Threads since it's changing a class variable
else:
print('Thread {0} - success'.format(self.getName())) # changed to self.getName()
if self.kill: # if kill is True, break the loop, send the email, and finish the Thread
break
print('process completed')
# Send Email
number_of_threads = 5
number_of_requests = 100
if __name__ == '__main__':
startTime = time.time()
threads = []
for i in range(number_of_threads):
et = EppThread(name="{0}-Thread-{1}".format(name, i + 1), args=(number_of_requests))
threads.append(et)
et.start()
# Check if execution time is not greater than 1 minute
while threads and time.time() - startTime < 60: # removed len() due to implicit Falsiness of empty containers in Python
time.sleep(0.5)
for thread in threads:
if not thread.isAlive():
threads.remove(thread)
print('Thread {0} terminated'.format(thread.getName()))
EppThread.kill = True
Now when any of the EppThreads has a bad connection it sets the class variable to True, which makes all of the other EppThreads break the loop as well. I also added EppThread.kill = True at the end so it'll break the request loops more cleanly if you exceed 1 minute run time.
Lastly, I added the url class variable. This is because you expressed interest in running different urls simultaneously and only kill the ones that specifically have a bad connection. All you have to do at this point is subclass EppThread and overwrite kill and url.
class EppThread2(EppThread):
kill = False
url = 'https://v2.example.com/api?$awesome=True'
Then you can instantiate EppThread2 and add it to the threads list and everything should work as you want it to.
You could create an event object that's shared between all your threads that share the same url. When you run into an error in the thread, set the event. Then, in your run loop check for the event. If it has happend, kill the thread by breaking the loop.
Here's a version of your example modified to use the Event.
import logging, time, threading, requests
#: Log items
logging.basicConfig(format='%(asctime)s %(levelname)s : %(message)s', level=logging.INFO)
class EppThread(threading.Thread):
def __init__(self, group=None, target=None, name=None, args=(), kwargs=None, verbose=None, bad_status=None):
threading.Thread.__init__(self, group=group, target=target, name=name, verbose=verbose)
self.args = args
self.bad_status = bad_status
def run(self):
startTime = time.time()
url = self.args[0]
limit = self.args[1]
for i in range(limit):
if self.bad_status.is_set():
# break the loop on errors in any thread.
break
response = requests.get(url)
if response.status_code != 200:
# Set the event when an error occurs
self.bad_status.set()
break
#Exit other similar threads (with same url)
else:
print('Thread {0} - success'.format(thread.getName()))
print('process completed')
# Send Email
number_of_threads = 5
number_of_requests = 100
urls = ['https://v1.api.com/example', 'https://v2.api.com/example']
if __name__ == '__main__':
startTime = time.time()
threads = []
for url in urls:
# Create an event for each URL
bad_status = threading.Event()
for i in range(number_of_threads):
et = EppThread(name = "{0}-Thread-{1}".format(name, i + 1), args=(url, number_of_requests), bad_status=bad_status)
threads.append(et)
et.start()
# Check if execution time is not greater than 1 minute
while len(threads) > 0 and (time.time() - startTime) < 60:
time.sleep(0.5)
for thread in threads:
if not thread.isAlive():
threads.remove(thread)
print('Thread {0} terminated'.format(thread.getName()))
os._exit(1)
The threading.Event class works for both threads and processes. So, if at somepoint you wanted to switch to using Process it would "just work".
Import sys
Here is an example:
import sys
list = []
if len(list) < 1:
sys.exit("You don\'t have any items in your list")
I have multiple threads that run a while loop. I would like to terminate these threads after a given amount of time. I am aware of other questions similar to this but I don't see how I can transfer those answers to my code.
def function1(arg1, arg2, arg3, duration):
t_end = time.time() + duration
while time.time() < t_end:
#do some stuff
for i in range(100):
t = Thread(target = function1, args=(arg1, arg2, arg3, 10))
t.start()
This opens 100 threads but they never close. How can I close these threads after the specified time, in this example 10 seconds? My function opens a socket.
You could pass a callback to each thread. And create a thread list.
threadlist = {}
def cb(id, currtime):
t = threadlist[id]
d = currtime - t.starttime
if d > 10:
return True
else:
return False
def function1(arg1, arg2, arg3, duration, cb, threadid):
t_end = time.time() + duration
while time.time() < t_end:
#do some stuff
if cb(threadid, time.time()):
break
for i in range(100):
t = Thread(target = function1, args=(arg1, arg2, arg3, 10, cb, i))
threadlist[id] = {"starttime": time.time(), "thread": t}
t.start()
And to check:
time.sleep(15)
for item in threadlist.values():
print(item.thread.is_alive())
Use a mixture of terminating a thread (info found here: Is there any way to kill a Thread in Python?)
and threading timer objects: https://docs.python.org/2/library/threading.html#timer-objects
The code below works for me, but the fact it keeps throwing a TypeError has got me puzzled. I can't seem to find much information on why it's happening or how to prevent it:
threadingtest.py
#!/usr/bin/env python3
import time
import threading
class StoppableThread(threading.Thread):
"""Thread class with a stop() method. The thread itself has to check
regularly for the stopped() condition."""
def __init__(self):
super(StoppableThread, self).__init__()
self._stop = threading.Event()
def stop(self):
self._stop.set()
try:
self.join()
except TypeError as tE:
print("Shutting down")
def stopped(self):
return self._stop.isSet()
class MyStoppableThread(StoppableThread):
def __init__(self, *args):
super(MyStoppableThread, self).__init__()
self.args = args # Use these in the thread
def run(self):
print("Started my thread with arguments {}".format(self.args))
while not self.stopped():
time.sleep(1)
# THIS IS WHERE YOU DO THINGS
if __name__ == "__main__":
threads = []
for i in range(100):
t = MyStoppableThread(i, 'a', 'b', 'c')
t.start()
threads.append(t)
print("\n:: all threads created\n")
time.sleep(5)
print("\n:: killing all threads\n");
for t in threads:
t.stop()
How do speed up this test code in python to Redis on Winxp using python 2.7?
Would multiprocessing be better? The load rate in 6000/s vs publish 100,000/s rates.
I chose 100,000, but could lower in testing. The process takes 15 seconds.
Would changing setting on server help???
import time
from time import strftime
import redis
import threading, Queue
start_time = time.time()
cxn = redis.StrictRedis('127.0.0.1',6379,1)
class WorkerMain(threading.Thread):
def __init__(self, queue):
threading.Thread.__init__(self)
self.queue = queue
def run(self):
while 1:
try: # take a job from the queue
row = self.queue.get_nowait()
except Queue.Empty: raise SystemExit
try:
cxn.set(row, "Row")
#print (row, "Row")
except: print 'Setup Error'
if __name__ == '__main__':
connections = 5
sml = range(1,100000)
queue = Queue.Queue()
for row in sml:
queue.put(str(row))
threads = []
for dummy in range(connections):
t = WorkerMain(queue)
t.start()
threads.append(t)
# wait for all threads to finish
for thread in threads:
thread.join()
print
end_time = time.time()
duration = end_time - start_time
print "Duration: %s" % duration
Used the code below for mulitprocessing and "monitored" the data with CLI...not all data went into the server.
from multiprocessing import Pool
import time
import redis
start_time = time.time()
cxn = redis.Redis('127.0.0.1',6379,1)
def rset(var):
cxn.set(var,"value")
if __name__ =='__main__':
sml = range(1,10000)
#for x in sml:print x
pool = Pool(processes=5)
for row in sml:
pool.apply_async(rset, [(row,)])
#print result.get(),
end_time = time.time()
duration = end_time - start_time
print "Duration: %s" % duration
Here is the pipelined code...... I just commented out the threading stuff.
from time import strftime
import redis
import threading, Queue
start_time = time.time()
cxn = redis.StrictRedis('127.0.0.1',6379,0)
pipe = cxn.pipeline(transaction=False)
class WorkerMain(threading.Thread):
def __init__(self, queue):
threading.Thread.__init__(self)
self.queue = queue
def run(self):
while 1:
try: # take a job from the queue
row = self.queue.get_nowait()
except Queue.Empty: raise SystemExit
try:
cxn.set(row, "Row")
#print (row, "ROw")
except: print 'Setup Error'
if __name__ == '__main__':
#connections = 5
sml = range(1,100000)
#queue = Queue.Queue()
for row in sml:
#queue.put(str(row))
pipe.set(str(row),"value").execute()# key, value
# threads = []
# for dummy in range(connections):
# t = WorkerMain(queue)
# t.start()
# threads.append(t)
#
# # wait for all threads to finish
# for thread in threads:
# thread.join()
print
end_time = time.time()
duration = end_time - start_time
print "Duration: %s" % duration
Use Pipelines. A Pipeline batches commands so you don't pay for network overheads.
See :
Section on Pipelines over here https://github.com/andymccurdy/redis-py
Pipelining on Redis.io - http://redis.io/topics/pipelining
Using threading for better performance is not a really good idea if you use cpython (the standard python interpreter) because of the gil.
http://wiki.python.org/moin/GlobalInterpreterLock
multiprocessing should work better