I'm learning multithreading and multiprocessing and experimenting with them to understand these concepts better. I noticed that when I run an extended process it works much slower than all other ways, why it could be?
`
from time import time
from multiprocessing import Process
def count(n):
while n > 0:
n -= 1
t0 = time()
count(100_000_000)
count(100_000_000)
print(time() - t0)
pr1 = Process(target=count, args=(100_000_000,))
pr2 = Process(target=count, args=(100_000_000,))
t0 = time()
pr1.start(); pr2.start()
pr1.join(); pr2.join()
print(time() - t0)
class Proc(Process):
def __init__(self, num):
super().__init__()
self.num = num
def run(self):
while self.num > 0:
self.num -= 1
pro1 = Proc(100_000_000)
pro2 = Proc(100_000_000)
t0 = time()
pro1.start(); pro2.start()
pro1.join(); pro2.join()
print(time() - t0)
#casual time : 7.1526172161102295
#two processes time : 3.7792704105377197
#two extended processes time : 9.833416223526001
`
The extended Proc, when running, makes 100_000_000 + 100_000_000 operations of accessing instance self.num attribute (in run function).If you change Proc's run function to the following:
def run(self):
n = self.num
while n > 0:
n -= 1
you'll get another performance indicators. As I got the following:
6.945512771606445
3.5630362033843994
3.5521087646484375
Related
I have been working few days to understand the Redlock and I have seen that the performance to lock takes around 1 second which seems abit too much to just lock it in my opinion but I could be wrong.
I have created a small script:
redis_test.py
import serialized_redis
from pottery import Redlock
redis_connection = serialized_redis.MsgpackSerializedRedis(host='localhost', port=6379, db=0)
def lock(argument):
return Redlock(key=argument, auto_release_time=120 * 1000)
main.py
#!/usr/bin/python3
# -*- coding: utf-8 -*-
import math
import random
import sys
import time
from threading import Thread
from loguru import logger
from lib.redis_test import lock, redis_connection
class StopWatch:
def __init__(self):
self.start()
def start(self):
self._startTime = time.time()
def getStartTime(self):
return self._startTime
def elapsed(self, prec=3):
prec = 3 if prec is None or not isinstance(prec, int) else prec
diff = time.time() - self._startTime
return self.round(diff, prec)
def round(self, n, p=0):
m = 10 ** p
return math.floor(n * m + 0.5) / m
def algorithm_with_lock(random_number):
print("Keys inside Redis", redis_connection.keys())
ourWatch = StopWatch()
ourWatch.start()
if not redis_connection.exists(f'redlock:{random_number}'):
# print("Time taken before redis_connection.exists", ourWatch.elapsed()) -> 0.0 seconds
with lock(f'{random_number}'):
print("Time taken before redis_connection.exists", ourWatch.elapsed()) # 1.002 seconds
time.sleep(5)
redis_connection.set("Hello_world", random.randint(1, 5))
return True
else:
return False
def main():
while True:
chosen_number = f"number_{random.randint(1, 3)}"
response = algorithm_with_lock(chosen_number)
if response:
logger.info(f"Yay, finished my job! -> {chosen_number}")
sys.exit()
else:
logger.debug(f"Trying new number! -> {chosen_number}")
time.sleep(1)
for i in range(1):
Thread(
target=main,
).start()
time.sleep(.1)
Issue with that is that it takes too long to actually lock a redis key which ends up that multiple threads can try to lock the same key which ends up being stuck in the lock tree. My guess is that it should not take 1 second to actually lock. But I could be wrong and here I am, I wonder what could be the reason of long time locking and if there is a chance im using it incorrectly?
Sorry for the possible duplication of a question. Unfortunately, I could not find an answer that is convenient for me.
I want to write a class to calculate the execution time (the mean and the rms of execution time) of a code in the manner of %timeit in ipython.
For this, I used this code https://stackoverflow.com/a/28218696/5328802 to use in with statement.
Here is the test:
from TimingManager import TimingManager
import numpy as np
N = 10000
def myfunction1():
l = [i**2 for i in range(N)]
def myfunction2():
l = np.arange(N)**2
print("List test")
with TimingManager(fun=yourfunction1,repeat=10) as t:
t.start()
print("Array test")
with TimingManager(fun=yourfunction2, repeat=10) as t:
t.start()
And here is my realisation of TimingManager class (file TimingManager.py):
import timeit
import statistics
class TimingManager():
"""Context Manager used with the statement 'with' to time some execution.
Example:
from TimingManager import TimingManager
with TimingManager(fun=yourfunction,repeat=10) as t:
t.start()
"""
clock = timeit.default_timer
def __init__(self,fun,repeat=10):
self.repeat = repeat
self.time_table = []
self.run = fun
def start(self):
for i in range(self.repeat):
self.timestart = self.clock()
self.run()
self.time_table.append(self.clock() - self.timestart)
def __enter__(self):
""" action on start """
self.timestart = self.clock()
return self
def __exit__(self, exc_type, exc_val, exc_tb):
""" action on exit """
self.print_process_stat()
return False
def print_process_stat(self):
tc = statistics.mean(self.time_table)
dt = statistics.pstdev(self.time_table)
print("Execution time is %s ± %s"%(time_conversion(tc),time_conversion(dt)))
def time_conversion(t):
if t < 1e-9:
return "%4.3g ps" % (t/1e-12)
elif t < 1e-6:
return "%4.3g ms" % (t/1e-9)
elif t < 1e-3:
return "%4.3g μs" % (t/1e-6)
elif t < 1:
return "%4.3g ms" % (t/1e-3)
elif t < 60:
return "%4.3g s" % (t)
elif t < 3600:
return "%2d min %2d s" % (t//60,t%60)
elif t < 24*3600:
return "%2d h %2d min" % (t//3600,(t/60)%60)
else:
return "%2d d %2d h" % (t//(24*3600),(t/60)%60)
It gives me statistics on the calculation time of what I need. But in my opinion this is not elegant. So my question is, is it possible to implement this procedure in a more elegant way (without defining the myfunction), namely to access the block after the with statement (perhaps using the content manager) to repeat it inside the TimingManager class? To use finally something like this:
with TimingManager(repeat=10):
Statemet1
Statemet2
..
I have used multiprocessing Pool to get some performance benefit over my sequential approach. However result is just opposite and Pool takes more time than sequential:
import multiprocessing as mp
import datetime
class A:
def __init__(self):
self.result_list = []
# parallel processing function
def foo_pool(self, data):
for d in data:
d[0] = d[0] * 10
return data
# sequential function
def foo_seq(self, data):
data[0] = data[0] * 10
return data
def log_result(self, result):
# This is called whenever foo_pool(i) returns a result.
self.result_list.extend([result])
def apply_async_with_callback(self):
pool = mp.Pool(8)
# Data Creation
lst = []
for i in range(100000):
lst.append([i, i + 1, i + 2])
print('length of data ', len(lst))
dtStart = datetime.datetime.now()
print('start time:', str(datetime.datetime.now()))
# Multiprocessing takes 2 secs
for data in self.chunks(lst, 1000):
pool.apply_async(self.foo_pool, args=(data,),
callback=self.log_result)
# Sequential. It is 10x faster than pool
# for d in lst:
# self.result_list.extend([self.foo_seq(d)])
pool.close()
pool.join()
print('output data length:', len(self.result_list))
dtEnd = datetime.datetime.now()
print('end time:', str(datetime.datetime.now()))
print('Time taken:', str(dtEnd - dtStart))
# Divide big data into chunks
def chunks(self, data, n):
for i in range(0, len(data), n):
res = data[i:i + n]
yield res
if __name__ == '__main__':
a = A()
a.apply_async_with_callback()
In above python code, in apply_async_with_callback(). If you un-comment the sequential code and run, result would get 10 times faster then multiprocessing Pool code.
Can someone help me understand, what is the wrong thing i am doing?
Edit:
After applying the code provided in Why is multiprocessed code in given code taking more time than usual sequential execution?
sequential is now only 2 times faster than parallel processing code. Updated code below:
import multiprocessing as mp
import datetime
class A:
def __init__(self):
self.result_list = []
# parallel processing function
def foo_pool(self, data):
for d in data:
d[0] = d[0] * float(10) + 10 * (float(d[0]) / 100)
return data
def log_result(self, result):
# This is called whenever foo_pool(i) returns a result.
self.result_list.extend([result])
def flatten(self, ll):
lst = []
for l in ll:
lst.extend(l)
return lst
def square(self, x):
return x * x
def squareChunk(self, chunk):
return self.foo_pool(chunk) #[self.foo_pool(x) for x in chunk]
def apply_async_with_callback(self):
# Data Creation
lst = []
for i in range(1000000):
lst.append([i, i + 1, i + 2])
print('length of data ', len(lst))
chunked = self.chunks(lst, 10000) # split original list in decent sized chunks
pool = mp.Pool(2)
dtStart = datetime.datetime.now()
print('start time:', str(datetime.datetime.now()))
results = self.flatten(pool.map(self.squareChunk, chunked))
pool.close()
pool.join()
print('output data length:', len(results))
dtEnd = datetime.datetime.now()
print('end time:', str(datetime.datetime.now()))
print('multi proc Time taken:', str(dtEnd - dtStart))
def chunks(self, l, n):
n = max(1, n)
return (l[i:i + n] for i in range(0, len(l), n))
if __name__ == '__main__':
a = A()
a.apply_async_with_callback()
I can see the difference of using Pool.map instead of Pool.apply_async. Code is faster now. Earlier it was 10 times slower than sequential, now it is 2 times slower. But... slower....
This is how multiprocessing behaves? Then what is the point of using multiprocessing? Or am i still doing something wrong?
Lets assume a simple method :
def test_method():
a = 1
b = 10000
c = 20000
sum1 = sum(range(a,b))
sum2 = sum(range(b,c))
return (sum1,sum2)
To time this method using a decorator, a simple decorator would be :
from functools import wraps
def timed_decorator(f):
#wraps(f)
def wrapper(*args, **kwds):
start = time.time()
result = f(*args, **kwds)
elapsed = (time.time() - start)*1000
logger.debug("f::{0} t::{1:0.2f} ms".format(f.__name__, elapsed))
return result
return wrapper
Now if I want to time specific lines of test_method say line 4 sum1 = sum(range(a,b)) , the current implementation involves inline coding like:
def test_method():
a = 1
b = 10000
c = 20000
start = time.time()
sum1 = sum(range(a,b)) # timing specific line or lines
elapsed = (time.time() - start)*1000
logger.debug("This part took::{1:0.2f} ms".format(elapsed))
sum2 = sum(range(b,c))
return (sum1,sum2)
The intention is to use the decorator to time lines M to N of a specific method without modifying the code in the method.
Is it possible to inject such logic using a decorator ?
You can use a context manager.
import contextlib
#contextlib.contextmanager
def time_measure(ident):
tstart = time.time()
yield
elapsed = time.time() - tstart
logger.debug("{0}: {1} ms".format(ident, elapsed))
In your code, you use it like
with time_measure('test_method:sum1'):
sum1 = sum(range(a, b))
By the way, if you want to improve your code, you can use the Gaussian Sum Formula (explained here) instead of sum(range(a, b)).
def sum_range(a, b):
r_a = (a ** 2 + a) / 2 - a
r_b = (b ** 2 + b) / 2 - b
return r_b - r_a
Very simple solution with a custom context manager:
class elapsed:
def __enter__(self): self.start = time.time()
def __exit__(self, *args): print("%.1f ms" % ((time.time() - self.start)*1000))
Example usage:
with elapsed():
sum1 = sum(x ** 2 for x in range(1, 1000000))
# 547.0 ms
More about this: Decorator-like syntax for a specific line of code
Another solution: here is a slight variation of #NiklasR's answer without logger but print, and a ready-to-run example:
import contextlib, time
#contextlib.contextmanager
def time_measure(ident):
tstart = time.time()
yield
elapsed = time.time() - tstart
print("{0}: {1} ms".format(ident, elapsed))
with time_measure('hello'):
sum1 = sum(x ** 2 for x in range(1, 1000000))
# hello: 0.577033281326294 ms
One way I can think of is to use sys.settrace() and record time when handling "line" event in the tracer function. But one caveat is, the practice of setting a tracer may cause the time recorded to be inaccurate.
The general idea is:
Set a tracer function in the decorator that wraps the target method.
Get the line number for the first line of this method, with FLN = inspect.currentframe().f_lineno.
In the tracer function, handle "call" event and return a local tracer function to trace the "line" events in the scope. Read this if you are confused.
Within the local tracer function, get the current line number LN,
if LN-FLN == M, record the start time; if LN-FLN == N, record the end time, the time taken to execute lines M to N is endtime - starttime.
code:
import sys
from functools import wraps
import time
import linecache
_func_name_ = None
_func_ln_ = 0
_start_ = 0
_end_ = 0
_timestamp_ = 0
def trace_calls(frame, event, arg):
global _func_name_, _func_ln_
def trace_lines(frame, event, arg):
global _timestamp_
if event != 'line':
return
line_no = frame.f_lineno
filename = frame.f_code.co_filename
if line_no-_func_ln_ == _start_:
_timestamp_ = time.time()
print "%d %s TS:%d"%(line_no, linecache.getline(filename, line_no)[:-1], _timestamp_)
elif line_no-_func_ln_ == _end_:
_timestamp_ = time.time() - _timestamp_
print "%d %s"%(line_no, linecache.getline(filename, line_no)[:-1])
print "Lines %d to %d of %s takes %d seconds."%(_start_, _end_, _func_name_, _timestamp_)
if event != 'call':
return
co = frame.f_code
_func_ln_ = frame.f_lineno # record the line number at function entry point
func_name = co.co_name
if func_name != _func_name_:
return
return trace_lines
def time_lines(start, end):
global _start_, _end_
_start_, _end_ = start+1, end+2 # function name takes a line, end is inclusive
def inner(f):
#wraps(f)
def wrapper(*args, **kwargs):
global _func_name_
_func_name_ = f.__name__
sys.settrace(trace_calls)
f(*args, **kwargs)
sys.settrace(None)
return wrapper
return inner
#time_lines(2,4)
def tested_func():
print "Enter target function"
time.sleep(2)
time.sleep(1)
time.sleep(3)
print "Exit target function"
if __name__=="__main__":
tested_func()
It's pretty ugly, and not very stable code. but the only way I found to do this task is to exec the code of the function again, after injecting your code.
Something like this:
import inspect
import re
import time
def inject_timer(f,n,m):
codelines = inspect.getsourcelines(f)[0]
ident_lvl = re.search("^[ \t]*",codelines[n]).group(0)
codelines.insert(n,ident_lvl + "start_longJibrishTo_preventCollision = time.time()\n")
codelines.insert(m+2,ident_lvl + "elapsed_longJibrishTo_preventCollision = (time.time() - start_longJibrishTo_preventCollision)*1000\n")
codelines.insert(m+3,ident_lvl + """print("f::{0} t::{1:0.2f} ms".format("""+f.__name__+""", elapsed_longJibrishTo_preventCollision))\n""")
#print "".join(codelines)
exec "".join(codelines) in globals()
def test_method():
a = 1
b = 10000
time.sleep(2)
c = 20000
sum1 = sum(range(a,b))
sum2 = sum(range(b,c))
return (sum1,sum2)
inject_timer(test_method,3,5)
A decorator can only decorate callables (e.g. functions, methods, classes). A single line or a group of lines are not callable as long as you do not wrap them in their own callable.
For timing a unit of your code you should choose an appropriate number of repetitions. The goal is to make sure that the execution time is longer than just a few micro or milliseconds, otherwise the measurement error will be too large.
Did you have a look at the timeit module?
Basically the more imports from different modules I include the longer these multiprocessing tasks take, even if none of the module functions are used. Is each process having to reimport everything or something? What is going on?
import time
time1 = time.time()
import multiprocessing as mp
import numpy as np # Random imports (not used)
import PIL
import PySide
import pandas
# print time.time() - time1 # here this prints 0.0
class Multi(object):
def __init__(self, queue):
self.q = queue
def run(self, a):
p = mp.Process(target=f, args=(a, q))
p.start()
print self.q.get()
p.join()
class MultiPool(object):
def __init__(self, N):
self.N = N
self.pool = mp.Pool(processes = self.N)
def run(self):
result = self.pool.map_async(f1, ((i,) for i in range(self.N)))
print result.get()
def f(a, q):
for i in range(10000000):
b = i
q.put(b)
def f1(a):
for i in range(10000000):
b = i
return b
if __name__ == '__main__':
q = mp.Queue()
e = Multi(q)
# time1 = time.time()
print f1(0)
print time.time() - time1
time1 = time.time()
e.run('123')
print time.time() - time1
time1 = time.time()
mpool = MultiPool(2)
mpool.run()
print time.time() - time1
# Output with random imports:
>9999999
>0.246000051498
>9999999
>0.693000078201
>[9999999, 9999999]
>0.720999956131
# Output without imports:
>9999999
>0.246000051498
>9999999
>0.315999984741
>[9999999, 9999999]
>0.313999891281
Yes multiprocessing must import everything in any proces just because are process (new applications) and not thread.
What you will measure by your script is the cost of methods execution plus the cost of process creation. You can measure the imports cost and they are execute in place exactly where the import statements are.