Calculate average sleep for python threads - python

I'm simplifying what I'm trying to achieve as much as possible
I have the following script:
import time, urllib, random
import threading
def get(timeout):
for i in range(2):
time.sleep(timeout)
return urllib.urlopen('http://localhost:9855').read()
def calculate_timeout(total_threads):
pass
if __name__ == '__main__':
total = random.randint(0,400)
for i in xrange(total):
threading.thread(target=get, kwargs={"timeout":calculate_timeout(len(total)}).start()
What I need to do is modify the calculate_timeout function in such a way so that whatever random number comes up,
there won't be more than 60 urllib requests per minute on average.

There's a bevy of rate limiting algorithms posted here including examples for threaded code, the simplest 'imo' I've posted below. The code will create a decorator which you can then use to decorate your get method with.
import time
import threading
from functools import wraps
def rate_limited(max_per_second):
"""
Decorator that make functions not be called faster than
"""
lock = threading.Lock()
min_interval = 1.0 / float(max_per_second)
def decorate(func):
last_time_called = [0.0]
#wraps(func)
def rate_limited_function(*args, **kwargs):
lock.acquire()
elapsed = time.clock() - last_time_called[0]
left_to_wait = min_interval - elapsed
if left_to_wait > 0:
time.sleep(left_to_wait)
lock.release()
ret = func(*args, **kwargs)
last_time_called[0] = time.clock()
return ret
return rate_limited_function
return decorate

Related

Measure elapsed time for dependent generators

I want to measure the time that various functions take. The thing is, the functions are generators which are piped together, like this:
import functools
import string
from time import sleep
from timeit import default_timer as timer
lines = (string.ascii_lowercase for _ in range(1000))
class Timer:
_results = {}
#classmethod
def measure(cls):
def decorator(method):
#functools.wraps(method)
def wrapper(*args, **kwargs):
obj = args[0]
start = timer()
gen = method(*args, **kwargs)
yield from gen
end = timer()
cls._results[str(obj)] = end - start
return wrapper
return decorator
class Source:
def __init__(self, lines):
self._lines = lines
def __iter__(self):
for line in self._lines:
yield line
class Log:
def __init__(self, stream):
self._stream = stream
def __next__(self):
return next(self._stream)
def __iter__(self):
yield from self._stream
def __or__(self, filter):
return filter(self)
#classmethod
def from_source(cls, source):
return cls(iter(source))
class Filter1:
def __call__(self, log):
return Log(self._generator(log))
#Timer.measure()
def _generator(self, log):
for event in log:
sleep(0.001)
yield event
class Filter2:
def __call__(self, log):
return Log(self._generator(log))
#Timer.measure()
def _generator(self, log):
for event in log:
yield event
if __name__ == "__main__":
source = Source(lines)
pipeline = Log.from_source(source) | Filter2() | Filter1()
list(pipeline)
print(Timer._results)
Filter1._generator and Filter2._generator are the functions I want to measure. As for the Log class, it has an __or__ operator allowing me to pipe those filters on the data. Notice that the filters are identical, but the Filter1 has some sleeps added (in my real code they both actually do some stuff, different stuff).
The Timer decorator is a standard decorator that uses timeit.default_timer to measure the function's execution time.
The result is:
{'<__main__.Filter2 object at 0x000001D0CB7B62C0>': 15.599821100011468, '<__main__.Filter1 object at 0x000001D0CB7B6500>': 15.599853199906647}
So, the times are pretty much identical. This is the result of the fact that one filter parses the data (here, it only yields it, I just created a small representation of what I'm working on) and yields the line to the next filter to be picked up. This is how it's supposed to work.
The question would be: can I measure the times of execution accurately here? The thing I want to measure is: how much time does each filter take to process all the lines. Because obviously Filter1._generator would take more time, but I cannot see it, because the Timer.measure() waits for the generator to exit.

how to time an entire process from beginning to completion and set up a termination execution time?

I have the following celery chain process:
#app.task(name='bcakground')
def background_task():
now = datetime.now()
ids = [700,701,708,722,783,799]
for id in ids:
my_process = chain(taks1.s(id), task2.s())
my_process()
end = datetime.now()
return ['ENDED IN',(end-now).total_seconds()]
Q1: How can I tell how long it takes for this task to complete from beginning to end? The result I get to (ENDED IN) doesnt reflect the truth because the chain is run in parallel and the results are a fraction of second.
Q2 is there any way to place a termination timeout in the event the entire process of background_task takes longer then 25 minutes?
I think you can use wraps from functools there is an answers to a similar question here: timeit-versus-timing-decorator. #jonaprieto gives an example of using wraps in the link, I have reproduced below. This should allow you to achieve what you want.
from functools import wraps
from time import time
def timing(f):
#wraps(f)
def wrap(*args, **kw):
ts = time()
result = f(*args, **kw)
te = time()
print 'func:%r args:[%r, %r] took: %2.4f sec' % \
(f.__name__, args, kw, te-ts)
return result
return wrap
in an example:
#timing
def f(a):
for _ in range(a):
i = 0
return -1
Invoking method f wrapped with #timing:
func:'f' args:[(100000000,), {}] took: 14.2240 sec
f(100000000)
For this, I use timedelta, it returns the difference between two datetime arguments.
import datetime
start_at = datetime.datetime.now()
# do your thing!
end = datetime.timedelta(seconds=(datetime.datetime.now() - start_at).total_seconds())
With this code, when you print(end) it will return a result like 0:00:00.253998

Timer in Python on Windows

If I have a function called a lot of times in a for loop and this function sometimes is running too much time, how can I use a timer for each call of function(to set and reset the timer each time)?
It looks like:
def theFunction(*args):
#some code (timer is on)
#In this point time is out, break and exit function
#Timer is reseted
for i in range(0,100):
theFunction(*args)
Use the time module like so:
import time
time_start = time.time()
#Do function stuff
time_stop = time.time()
#Check your time now
timed_segment = time_stop - time_start
#Repeat if needed
To run this multiple times in a for loop you will need to append times into a list as it runs like so:
import time
def function():
times_list = []
for x in range(10)
time_start = time.time()
#Do function stuff
time_stop = time.time()
#Check your time now
timed_segment = time_stop - time_start
times_list.append(timed_segment)
#Repeat as many times as needed
return times_list
If you want to break after a certain amount of time you can use a while loop instead like so:
import time
def function():
times_list = []
time_start = time.time()
time_end = time.time()
while time_end - time_start < 10: #after 10 seconds the while loop will time out
#Your function does stuff here
time_end = time.time()
#Next, append times to a list if needed
time_list.append(time_start - time_end)
return times_list
To stop the function after a certain time regardless of where it is, we can use threading like so:
import threading
from time import sleep
def do_stuff():
sleep(10)
print("1 + 2")
return
t = threading.Thread(target=do_stuff)
t.start()
t.join(timeout = 5)
In the above example, calling timeout in join will kill the thread after 5 seconds. We can also put this into a decorator if we plan on reusing it many times like so:
import threading
from time import sleep
def timeout(func):
def inner_func(*nums, **kwargs):
t = threading.Thread(target=func, args=(*nums,))
t.start()
t.join(timeout=5)
return inner_func
#timeout
def do_stuff(a,b):
sleep(3)
print(a+b)
return
do_stuff(1,3)
There is another module called timeit which can measure the execution time of small code snippets. I believe you can use that also. I have never used that module but it should work.
Here is the link to the doc page. Give it a look :: https://docs.python.org/2/library/timeit.html
see How to use timeit module as well
For high re-usability and ease of implementations, I would recommend -
Using decorators -
from time import time
def time_it(func):
def wrapper(*args, **kwargs):
a=time()
func(*args, **kwargs)
print(a-time())
return wrapper
#time_it
def foo(s='this works'):
print(s)
foo()
Using profile.run - https://docs.python.org/2/library/profile.html#module-profile

How to use the resource module to measure the running time of a function?

I want to measure the CPU running time and wall clock running time of functions using Python code.
The resource module was suggested here: How to measure CPU running time and wall clock running time of a function, separately, as Python code (not from terminal)?
Here is the module documentation: http://docs.python.org/2/library/resource.html
The problem is that:
1) I can't figure out how to use it to measure the running time of a function.
2) I don't know how to extract that information from the object returned.
How do I do this?
Just call getrusage before and after executing the function, subtract the fields you care about, and you're done. Since resource doesn't do wall time, you'll need to use a separate function for that.
You can wrap that up in a helper function, or even a decorator, like this:
import datetime
import functools
import resource
import sys
def timed(func):
#functools.wraps(func)
def wrapper(*args, **kwargs):
r0 = resource.getrusage(resource.RUSAGE_SELF)
t0 = datetime.datetime.now()
retval = func(*args, **kwargs)
r = resource.getrusage(resource.RUSAGE_SELF)
t = datetime.datetime.now()
sys.stderr.write('{}: utime {} stime {} wall: {}\n'.format(
func.__name__,
datetime.timedelta(seconds=r.ru_utime-r0.ru_utime),
datetime.timedelta(seconds=r.ru_stime-r0.ru_stime),
t-t0))
return retval
return wrapper
#timed
def myfunc(i):
for _ in range(100000000):
pass
return i*2
print(myfunc(2))
This will print out something like:
myfunc: utime 0:00:03.261688 stime 0:00:00.805324 wall 0:00:04.067109
4
If you want more than a couple fields, you probably want to subtract all of the members of the rusage results, but since these are all int or float, that's easy:
rdiff = resource.struct_rusage(f1-f0 for f0, f1 in zip(r0, r))
sys.stderr.write('{}: utime {} maxrss {} nsignals {} etc.\n'.format(
datetime.timedelta(seconds=rdiff.r_utime),
rdiff.ru_maxrss,
rdiff.ru_nsignals))

How do you have python scripts display how much time it takes to execute each process?

It was something like cMessage I think? I can't remember, could someone help me?
cProfile ?
To time a function, you can also use a decorator like this one:
from functools import wraps
import time
def timed(f):
"""Time a function."""
#wraps(f)
def wrapper(*args, **kwds):
start = time.clock()
result = f(*args)
end = 1000 * (time.clock() - start)
print '%s: %.3f ms' % (f.func_name, end)
return result
return wrapper
And "mark" your fonction by "#timed" like that:
#timed
def toBeTimed():
pass

Categories