I wrote 3 different codes to compare having threads vs. not having threads. Basically measuring how much time I save by using threading and the result didn't make any sense.
Here are my codes:
import time
def Function():
global x
x = 0
while x < 300000000:
x += 1
print x
e1 = time.clock()
E1 = time.time()
Function()
e2 = time.clock()
E2 = time.time()
print e2 - e1
print E2 - E1
When I ran this, I got this as output:
26.6358742929
26.6440000534
Then I wrote another function as shown below and split counting up to 300 million into counting 3, 100 millions:
import time
def Function():
global x
x = 0
while x < 100000000:
x += 1
print x
def Function2():
global x
x = 0
while x < 100000000:
x += 1
print x
def Function3():
global x
x = 0
while x < 100000000:
x += 1
print x
e1 = time.clock()
E1 = time.time()
Function()
Function2()
Function3()
e2 = time.clock()
E2 = time.time()
print e2 - e1
print E2 - E1
The output of the following function was:
26.0577638729
26.0629999638
and lastly I created 3 threads and ran each function on a single thread:
import time
import threading
e1 = time.clock()
E1 = time.time()
def Function1():
global x
x = 0
while x < 100000000:
x += 1
print x
def Function2():
global x
x = 0
while x < 100000000:
x += 1
print x
def Function3():
global x
x = 0
while x < 100000000:
x += 1
print x
new_thread1 = threading.Thread(target = Function1() , args = ())
new_thread2 = threading.Thread(target = Function2(), args = ())
new_thread3 = threading.Thread(target = Function3(), args = ())
e1 = time.clock()
E1 = time.time()
new_thread1.start()
new_thread2.start()
new_thread3.start()
e2 = time.clock()
E2 = time.time()
print e2 - e1
print E2 - E1
The out put of this one was:
0.000601416222253
0.0
These numbers make no sense to me. I'm just trying to measure how much time does threading save me. I've looked up in the documentation and using time.time
and time.clock made sense to me, but it doesn't make sense here. Also, the actual time for 1st and 2nd snippet were about 10 seconds and 3rd one about 5
you are calling it wrong ....
new_thread1 = threading.Thread(target = Function1 , args = ())
note that you should not CALL the function when you create the thread
those timings really mean nothing they are both essentially zero because all you are timing is 3 instant return function calls to start
note to get the output you will need to wait for each thread to finish (since your current code does not do this )
EDIT FOR MORE INFO
with threading you are locked by the gil to one python instruction at a time... typically this is not a problem since you are usually waiting on disk io... In your example code however it is 100% computation so threading really doesnt improve your time ... Multiprocessing may as demonstrated below
import time
import threading
import multiprocessing
def fn():
'''since all 3 functions were identical you can just use one ...'''
x = 0
while x < 100000000:
x += 1
def TEST_THREADS():
new_thread1 = threading.Thread(target = fn , args = ())
new_thread2 = threading.Thread(target = fn, args = ())
new_thread3 = threading.Thread(target = fn, args = ())
new_thread1.start()
new_thread2.start()
new_thread3.start()
new_thread1.join()
new_thread2.join()
new_thread3.join()
def TEST_NORMAL():
fn()
fn()
fn()
def TEST_MULTIPROCESSING():
new_thread1 = multiprocessing.Process(target = fn , args = ())
new_thread2 = multiprocessing.Process(target = fn, args = ())
new_thread3 = multiprocessing.Process(target = fn, args = ())
new_thread1.start()
new_thread2.start()
new_thread3.start()
new_thread1.join()
new_thread2.join()
new_thread3.join
if __name__ == "__main__":
'''It is very important to use name == __main__ guard code with threads and multiprocessing'''
import timeit
print "Time to Run 1x: %0.2fs"%(timeit.timeit(fn,number=1),)
print "NORMAL:%0.2fs"%(timeit.timeit(TEST_NORMAL,number=1),)
print "Threaded: %0.2fs"%(timeit.timeit(TEST_THREADS,number=1),)
print "Multiprocessing: %0.2fs"%(timeit.timeit(TEST_MULTIPROCESSING,number=1),)
I get the following output
Time to Run 1x: 3.71181102665
NORMAL:11.0136830117
Threaded: 23.392143814
Multiprocessing: 3.80878260515
Related
I have a function that factor a number. It depends on some random condition.
So what I am trying to do it's to run multiple processors in this function and the processor that finds the factor first returns the value and all processors terminate.
What I have so far is very wrong. The processors are not terminating and I also don't know how to get the value that was returned by the function
flag = False
def rho(n, processor):
while True:
x = random.randrange(1, n-1)
x2 = x
gcd = 1
c = random.randrange(1, n-1)
while gcd == 1:
x = (x**2 + c) % n
x2 = (x2**2 + c) % n
x2 = (x2**2 + c) % n
gcd = math.gcd(abs(x - x2), n)
if gcd != n:
flag = True
print("Factor was found from "+process+" and is ", gcd)
return gcd
if __name__ == "__main__":
p1 = multiprocessing.Process(target=rho, args=(91, "process 1" ))
p2 = multiprocessing.Process(target=rho, args=(91, "process 2"))
p1.start()
p2.start()
if flag:
p1.terminate()
p2.terminate()
The output is:
Factor was found from process 2 and is 13
Factor was found from process 1 and is 7
You can use multiprocessing.Pool and it's methods map(), imap_unordered() etc. These will return also values from worker functions.
Example (I used time.sleep() to simulate some time-intesive computation):
from time import sleep
from multiprocessing import Pool
def rho(params):
n, processor = params
# your computation here
# ...
sleep(n)
print("Factor was found from " + processor + " and is 42")
return 42
if __name__ == "__main__":
with Pool() as pool:
for result in pool.imap_unordered(
rho, ((10, "process 1"), (1, "process 2"))
):
print("Result I got:", result)
break # <-- I don't want other results, so break
Prints:
Factor was found from process 2 and is 42
Result I got: 42
EDIT: Two different functions:
from time import sleep
from multiprocessing import Pool
def fn1(n, p):
sleep(n)
print("Factor was found from " + p + " and is 42")
return 42
def fn2(n, p):
sleep(n)
print("Factor was found from " + p + " and is 99")
return 99
def rho(params):
what_to_call, n, processor = params
return what_to_call(n, processor)
if __name__ == "__main__":
with Pool() as pool:
for result in pool.imap_unordered(
rho, ((fn1, 10, "process 1"), (fn2, 1, "process 2"))
):
print("Result I got:", result)
break # <-- I don't want other results, so break
I am trying to use multiprocesing library to compare the performance of my processor on 1 core vs 2 cores.
Therefore I calculate a great product using 1 loop, 2 loops on 1 core, and 2 loops on 2 cores (1 core/loop). The problem is that the value of D1.result and D2.result are 0 although they are expected to be the product of the "half/loop".
The code is the following:
import random
from multiprocessing import Process as Task, freeze_support
N = 10 ** 3
l = [random.randint(2 ** 999, 2 ** 1000 - 1) for x in range(N)]
# ---------------------------------------------------------------
class Loop:
def __init__(self):
self.result=0
def boucle(self,start,end):
self.result = l[start]
for v in l[start+1:end]:
self.result = self.result*v
# ---------------------------------------------------------------
if __name__=="__main__":
print("1 Loop without multiprocessing")
A=Loop()
sta = time.time()
ra=A.boucle(0,N)
end = time.time()
print("--> Time :", end - sta)
#----------------------------------------------------------------------
print("2 Loops without multiprocessing")
B1=Loop()
B2=Loop()
sta = time.time()
rb1 = B1.boucle(0, N//2)
rb2 = B2.boucle(N//2, N)
rb = B1.result*B2.result
end = time.time()
print("--> Time :", end - sta)
if rb - A.result == 0 :
check="OK"
else :
check="fail"
print("--> Check :", check)
# ----------------------------------------------------------------------
print("2 Loops with multiprocessing")
freeze_support()
D1=Loop()
D2=Loop()
v1 = Task(target=D1.boucle, args=(0,N//2))
v2 = Task(target=D2.boucle, args=(N//2,N))
sta = time.time()
v1.start()
v2.start()
v1.join()
v2.join()
rd = D1.result*D2.result
end = time.time()
print("D1",D1.result)
print("D2",D2.result)
print("--> Time :", end - sta)
if rd - A.result == 0 :
check="OK"
else :
check="fail"
print("--> Check :", check)
The result of this code is :
1 Loop without multiprocessing
--> Time : 0.5025153160095215
2 Loops without multiprocessing
--> Time : 0.283463716506958
--> Check : OK
2 Loops with multiprocessing
D1 0
D2 0
--> Time : 0.2579989433288574
--> Check : fail
Process finished with exit code 0
Why D1 0 and D2 0 and not the result of the loop ?
Thanks you !
The issue with this code is shown when D1 and D2 are displayed:
In multiprocessing, tasks are executed in a forked process. This process got a copy of the data.
In each forked process the value is properly computed but it is never sent back to main process.
To work around this you can:
Use shared memory to store the result, but in this case you are limited to C types. Your numbers do not fit on 64 bits (max integer size in C), so this is not a good solution.
Use a pool of process, thus data will be shared using queues and you will be able to manage real python types.
This last option requires that "boucle" function returns the result.
Here is the code:
import random
from multiprocessing import Process as Task, freeze_support, Pool
import time
N = 10 ** 3
l = [random.randint(2 ** 999, 2 ** 1000 - 1) for x in range(N)]
# ---------------------------------------------------------------
class Loop:
def __init__(self):
self.result = 0
def boucle(self, start, end):
self.result = l[start]
for v in l[start + 1:end]:
self.result = self.result * v
return self.result
# ---------------------------------------------------------------
if __name__ == "__main__":
print("1 Loop without multiprocessing")
A = Loop()
sta = time.time()
ra = A.boucle(0, N)
end = time.time()
print("--> Time :", end - sta)
# ----------------------------------------------------------------------
print("2 Loops without multiprocessing")
B1 = Loop()
B2 = Loop()
sta = time.time()
rb1 = B1.boucle(0, N // 2)
rb2 = B2.boucle(N // 2, N)
rb = B1.result * B2.result
end = time.time()
print("--> Time :", end - sta)
if rb - A.result == 0:
check = "OK"
else:
check = "fail"
print("--> Check :", check)
# ----------------------------------------------------------------------
print("2 Loops with multiprocessing")
freeze_support()
D1 = Loop()
D2 = Loop()
pool = Pool(processes=2)
with pool:
sta = time.time()
sta = time.time()
rb1 = pool.apply_async(B1.boucle, (0, N // 2))
rb2 = pool.apply_async(B2.boucle, (N // 2, N))
v1 = rb1.get()
v2 = rb2.get()
rd = v1 * v2
end = time.time()
print("D1", D1.result)
print("D2", D2.result)
print("--> Time :", end - sta)
if rd - A.result == 0:
check = "OK"
else:
check = "fail"
print("--> Check :", check)
And the result:
1 Loop without multiprocessing
--> Time : 0.3473360538482666
2 Loops without multiprocessing
--> Time : 0.18696999549865723
--> Check : OK
2 Loops with multiprocessing
D1 0
D2 0
--> Time : 0.1116642951965332
--> Check : OK
You can also use map with the pool to get the value back, but I have not tried it in this case because you only call 2 functions, and pool workers get tasks by "packets of functions - see maxtaskperchild" so it could be possible that only one worker will have taken the 2 functions for itself
I am a beginner and am getting this error when I run my code in main_odroid.py:
'NameError: name 'alarmthresh5' is not defined'
When i run this code:
from threading import Thread
def func3():
Import main_odroid
global alarmthresh5
alarmthresh5 = 1.45 * stddev2
[...]
if floats[1] > alarmthresh5:
wpi.digitalWrite(1, 1)
[...]
Thread(target = func3).start()
[...]
[...] means the rest of the (irrelevant) code that I excluded for clarity.
I used the search function and tried different answers but it did not solve the problem.
Thanks I and I am interested in your answers.
EDIT: scope of variable alarmthresh5 is global i think, because I typed 'global alarmthresh5' in func3.
EDIT2: The entire code
import subprocess
from subprocess import Popen, PIPE
import os
import odroid_wiringpi as wpi
from collections import deque
import time
import datetime
from threading import Thread
from math import sqrt
sysdamping = 10
freqdamping = 14
totalbins = 2000
#Functions
def average(p): return sum(p) / float(len(p))
def variance(p): return list(map(lambda x: (x - average(p))**2, p))
def std_dev(p): return sqrt(average(variance(p)))
wpi.wiringPiSetup()
wpi.pinMode(1, 1)
wpi.pinMode(5, 1)
wpi.pinMode(27, 1)
wpi.pinMode(31, 1)
wpi.pinMode(23, 1)
wpi.pinMode(11, 1)
wpi.digitalWrite(23, 0)
wpi.digitalWrite(11, 0)
wpi.pinMode(2, 1)
wpi.pinMode(7, 1)
wpi.digitalWrite(2, 0)
wpi.digitalWrite(7, 0)
wpi.pinMode(9, 1)
wpi.digitalWrite(9, 0)
time.sleep(0.1)
wpi.pullUpDnControl(30, 2)
os.system("killall -9 soapy_power -q")
time.sleep(0.1)
def func2():
exec(open("/home/odroid/rtl-power-fftw/build/deactiveer.py").read())
def func3():
import main_odroid
global alarmthresh1
global alarmthresh2
global alarmthresh3
global alarmthresh4
global alarmthresh5
global Lijst
alarmthresh1 = (average(rolling[i]) + 1.45 * stddev2)
alarmthresh2 = alarmthresh1 + 2
alarmthresh3 = alarmthresh2 + 2
alarmthresh4 = alarmthresh3 + 2
alarmthresh5 = alarmthresh4 + 2
Lijst = [381843000.0, 382242000.0, 382245000.0, 382248000.0, 382254000.0, 382257000.0, 382305000.0, 382308000.0, 382320000.0, 382323000.0, 382326000.0, 382389000.0, 382392000.0, 382557000.0, 382560000.0, 382671000.0, 383157000.0, 383454000.0, 383457000.0, 383469000.0, 383472000.0, 383997000.0, 384000000.0, 384003000.0, 384006000.0]
def func4():
import main_odroid
global stddev2
stddev2 = std_dev(sweep)
def func1():
cmd = ["/home/odroid/.local/bin/soapy_power", "-f", "433M", "-r", "6M", "--force-rate", "-q", "-c", "-b", "2000", "-F", "rtl_power_fftw"]
p = subprocess.Popen(cmd, stdout=subprocess.PIPE, bufsize=1)
rolling = []
i = 0
rolling_avg = deque([])
sweep = deque([], maxlen=1500)
stddev = 12
totalbins = 2000
# Let's see what's going on with rtl_power_fftw
for line in iter(p.stdout.readline, b''):
#Ignore garbage output
if not (b'#' in line or not line.strip()):
floats = list(map(float, line.split()))
# Create 2D array
if len(rolling) < totalbins:
rolling.append(deque([]))
rolling[i].append(floats[1])
sweep.append(floats[1])
# Let's start filtering...
if len(rolling[i]) >= freqdamping:
rolling[i].popleft()
print("ref 5")
input_state3 = wpi.digitalRead(9)
if floats[1] > alarmthresh5 and 380820000.0 < floats[0] < 384570000.0 and floats[0] not in Lijst: # Led 5
wpi.digitalWrite(1, 1)
if input_state3 == 0:
wpi.digitalWrite(9, 1) # Buzzer proxy
wpi.digitalWrite(31, 1) # Buzzer ON
print(round(floats[0]), round(floats[1]), alarmthresh5, "ref2")
Thread(target = func2).start()
if floats[1] > alarmthresh1 and floats[0] in [380310000.0, 381444000.0, 381579000.0, 381780000.0, 381783000.0, 382110000.0, 382836000.0, 382881000.0, 382884000.0, 382887000.0, 382890000.0, 382893000.0, 383037000.0, 383085000.0, 383340000.0, 383460000.0, 384258000.0, 384261000.0, 384264000.0]:
if input_state3 == 0:
wpi.digitalWrite(9, 1) # Buzzer proxy
wpi.digitalWrite(31, 1) # Buzzer ON
print(round(floats[0]), round(floats[1]), alarmthresh1, "ref1")
Thread(target = func2).start()
if floats[1] > alarmthresh4 and 380820000.0 < floats[0] < 384570000.0 and floats[0] not in Lijst: # Led 4
wpi.digitalWrite(7, 1)
if floats[1] > alarmthresh3 and 380820000.0 < floats[0] < 384570000.0 and floats[0] not in Lijst: # Led 3
wpi.digitalWrite(23, 1)
if floats[1] > alarmthresh2 and 380820000.0 < floats[0] < 384570000.0 and floats[0] not in Lijst: # Led 2
wpi.digitalWrite(5, 1)
if floats[1] > alarmthresh1 and 380820000.0 < floats[0] < 384570000.0 and floats[0] not in Lijst: # Led 1
input_state = wpi.digitalRead(27)
if input_state == 0:
wpi.digitalWrite(27, 1)
Thread(target = func2).start()
# Increment or reset indexer (i)
if i < totalbins - i: i = i + 1
else:
i = 0
Thread(target = func4).start() # Set stddev2
Thread(target = func3).start() # Set alarmthresh1 - alarmthresh5
print("ref 6")
p.stdout.close()
p.wait()
Thread(target = func1).start()
Ref 6 prints before ref 5, so func3 is started before alarmthresh5 is called.
Can you specify scope of variable alarmthresh5. Try to add variable in function definition only, something like this: def func3(alarmthresh5=valueyouwantotput)
I have the following code which turns an outlet on/off every 3 seconds.
start_time = time.time()
counter = 0
agent = snmpy4.Agent("192.168.9.50")
while True:
if (counter % 2 == 0):
agent.set("1.3.6.1.4.1.13742.6.4.1.2.1.2.1.1",1)
else:
agent.set("1.3.6.1.4.1.13742.6.4.1.2.1.2.1.1", 0)
time.sleep(3- ((time.time()-start_time) % 3))
counter = counter + 1
Is there a way I can have the loop terminate at any given point if something is entered, (space) for example... while letting the code above run in the mean time
You can put the loop in a thread and use the main thread to wait on the keyboard. If its okay for "something to be entered" can be a line with line feed (e.g., type a command and enter), then this will do
import time
import threading
import sys
def agent_setter(event):
start_time = time.time()
counter = 0
#agent = snmpy4.Agent("192.168.9.50")
while True:
if (counter % 2 == 0):
print('agent.set("1.3.6.1.4.1.13742.6.4.1.2.1.2.1.1",1)')
else:
print('agent.set("1.3.6.1.4.1.13742.6.4.1.2.1.2.1.1", 0)')
if event.wait(3- ((time.time()-start_time) % 3)):
print('got keyboard')
event.clear()
counter = counter + 1
agent_event = threading.Event()
agent_thread = threading.Thread(target=agent_setter, args=(agent_event,))
agent_thread.start()
for line in sys.stdin:
agent_event.set()
Reading the documentation: https://docs.python.org/2/library/multiprocessing.html
I decided to write a cpu intensive code and compare multiprocessing with serial computation.
First of all, if this library is using multiprocessing, then why I only see 1 python.exe process?
Secondly, why serial computation takes 12 seconds while multiprocessed one takes 22 seconds?
serial code:
from datetime import datetime
def calc_fib(ind):
fb = 1
if ind >= 3:
prev = 1
i = 2
while i < ind:
prev_tmp = fb
fb += prev
prev = prev_tmp
i += 1
return fb
def long_calc_fib(ind):
val = 0
for j in range(500):
val = calc_fib(ind)
return val
if __name__ == "__main__":
t1 = datetime.now()
for i in range(10):
tmp = long_calc_fib(10000)
t2 = datetime.now()
print str(t2 - t1)
multiprocessing pool code:
from datetime import datetime
from multiprocessing.pool import ThreadPool
def calc_fib(ind):
fb = 1
if ind >= 3:
prev = 1
i = 2
while i < ind:
prev_tmp = fb
fb += prev
prev = prev_tmp
i += 1
return fb
def long_calc_fib(ind):
val = 0
for j in range(500):
val = calc_fib(ind)
return val
if __name__ == "__main__":
t1 = datetime.now()
pool = ThreadPool(processes=10)
async_results = []
for i in range(10):
async_results.append(pool.apply_async(long_calc_fib, (10000,)))
for res in async_results:
tmp = res.get()
t2 = datetime.now()
print str(t2 - t1)
My mistake.
I must have used Pool instead of ThreadPool.
By chaning ThreadPool to Pool, I reduced the time to 3 seconds.