I wanted to know which one faster counter
1)
from threading import Thread
def c(output, i):
if i in output:
output[i] += 1
else:
output[i] = 1
def build(itera):
output = {}
for i in itera:
Thread(target=c, args=(output, i)).start()
return output
def build(itera):
output = {}
for i in itera:
if i in output:
output[i] += 1
else:
output[i] = 1
return output
from collections import Counter
Counter("12342")
And if any code which performs same this but is faster all all three block of code please tell me
Use this to figure out which methods takes the least amount of time:
import time
start = time.time()
***
The code you want to test out
***
end = time.time()
time_taken = round(end - start), 2
print(time_taken)
The output is in seconds.
Using timeit:
import timeit
def do_something():
pass
timeit.timeit(f"{do_something()}")
Add this into your code, and see which runs quicker:
from datetime import datetime
start = datetime.now()
#code here
end = datetime.now()
total = end - start
print(total.total_seconds())
Related
I am trying to use multiprocesing library to compare the performance of my processor on 1 core vs 2 cores.
Therefore I calculate a great product using 1 loop, 2 loops on 1 core, and 2 loops on 2 cores (1 core/loop). The problem is that the value of D1.result and D2.result are 0 although they are expected to be the product of the "half/loop".
The code is the following:
import random
from multiprocessing import Process as Task, freeze_support
N = 10 ** 3
l = [random.randint(2 ** 999, 2 ** 1000 - 1) for x in range(N)]
# ---------------------------------------------------------------
class Loop:
def __init__(self):
self.result=0
def boucle(self,start,end):
self.result = l[start]
for v in l[start+1:end]:
self.result = self.result*v
# ---------------------------------------------------------------
if __name__=="__main__":
print("1 Loop without multiprocessing")
A=Loop()
sta = time.time()
ra=A.boucle(0,N)
end = time.time()
print("--> Time :", end - sta)
#----------------------------------------------------------------------
print("2 Loops without multiprocessing")
B1=Loop()
B2=Loop()
sta = time.time()
rb1 = B1.boucle(0, N//2)
rb2 = B2.boucle(N//2, N)
rb = B1.result*B2.result
end = time.time()
print("--> Time :", end - sta)
if rb - A.result == 0 :
check="OK"
else :
check="fail"
print("--> Check :", check)
# ----------------------------------------------------------------------
print("2 Loops with multiprocessing")
freeze_support()
D1=Loop()
D2=Loop()
v1 = Task(target=D1.boucle, args=(0,N//2))
v2 = Task(target=D2.boucle, args=(N//2,N))
sta = time.time()
v1.start()
v2.start()
v1.join()
v2.join()
rd = D1.result*D2.result
end = time.time()
print("D1",D1.result)
print("D2",D2.result)
print("--> Time :", end - sta)
if rd - A.result == 0 :
check="OK"
else :
check="fail"
print("--> Check :", check)
The result of this code is :
1 Loop without multiprocessing
--> Time : 0.5025153160095215
2 Loops without multiprocessing
--> Time : 0.283463716506958
--> Check : OK
2 Loops with multiprocessing
D1 0
D2 0
--> Time : 0.2579989433288574
--> Check : fail
Process finished with exit code 0
Why D1 0 and D2 0 and not the result of the loop ?
Thanks you !
The issue with this code is shown when D1 and D2 are displayed:
In multiprocessing, tasks are executed in a forked process. This process got a copy of the data.
In each forked process the value is properly computed but it is never sent back to main process.
To work around this you can:
Use shared memory to store the result, but in this case you are limited to C types. Your numbers do not fit on 64 bits (max integer size in C), so this is not a good solution.
Use a pool of process, thus data will be shared using queues and you will be able to manage real python types.
This last option requires that "boucle" function returns the result.
Here is the code:
import random
from multiprocessing import Process as Task, freeze_support, Pool
import time
N = 10 ** 3
l = [random.randint(2 ** 999, 2 ** 1000 - 1) for x in range(N)]
# ---------------------------------------------------------------
class Loop:
def __init__(self):
self.result = 0
def boucle(self, start, end):
self.result = l[start]
for v in l[start + 1:end]:
self.result = self.result * v
return self.result
# ---------------------------------------------------------------
if __name__ == "__main__":
print("1 Loop without multiprocessing")
A = Loop()
sta = time.time()
ra = A.boucle(0, N)
end = time.time()
print("--> Time :", end - sta)
# ----------------------------------------------------------------------
print("2 Loops without multiprocessing")
B1 = Loop()
B2 = Loop()
sta = time.time()
rb1 = B1.boucle(0, N // 2)
rb2 = B2.boucle(N // 2, N)
rb = B1.result * B2.result
end = time.time()
print("--> Time :", end - sta)
if rb - A.result == 0:
check = "OK"
else:
check = "fail"
print("--> Check :", check)
# ----------------------------------------------------------------------
print("2 Loops with multiprocessing")
freeze_support()
D1 = Loop()
D2 = Loop()
pool = Pool(processes=2)
with pool:
sta = time.time()
sta = time.time()
rb1 = pool.apply_async(B1.boucle, (0, N // 2))
rb2 = pool.apply_async(B2.boucle, (N // 2, N))
v1 = rb1.get()
v2 = rb2.get()
rd = v1 * v2
end = time.time()
print("D1", D1.result)
print("D2", D2.result)
print("--> Time :", end - sta)
if rd - A.result == 0:
check = "OK"
else:
check = "fail"
print("--> Check :", check)
And the result:
1 Loop without multiprocessing
--> Time : 0.3473360538482666
2 Loops without multiprocessing
--> Time : 0.18696999549865723
--> Check : OK
2 Loops with multiprocessing
D1 0
D2 0
--> Time : 0.1116642951965332
--> Check : OK
You can also use map with the pool to get the value back, but I have not tried it in this case because you only call 2 functions, and pool workers get tasks by "packets of functions - see maxtaskperchild" so it could be possible that only one worker will have taken the 2 functions for itself
I am trying to run multiProcessing in python for the first time but when I debug in PyCharm I see that Thread 8 is waiting for a lock and I believe this is where my code is slowing down. I thought about mapping process pool in the while loop but this seems redundant to do this. Am i on the right track with this?
import random
import time
import math
import multiprocessing as mp # alias
# from multiprocessing.dummy import Pool as ThreadPool
# initial sequence:
# HHTHTHHHTHHHTHTH
coinChoices = ["H", "T"] # choices
idealFlip = "HHTHTHHH" # string to match
flip = "" # resets flip
margin_error = 0.1 # accuracy
num_matches = 0.0 # matched strings
probability = 0 # calc. probability
counter = 0 # iterations
flag = 1 # exit token
secondFlag = 1
check = math.ceil(pow(2, len(idealFlip))/2) # used for printing prob
# flips a coin *NOT USED*
def flip_coin(coins):
return str(random.choice(coins))
# requests (num) tasks to be completed
def flip_coin_num(num):
return str(random.choice(coinChoices))
# theoretical probability
def compute_probability():
size = len(idealFlip)
return math.pow(0.5, size)
# actual probability
def empirical_probability(count, num_flips):
return count / num_flips
# TODO: implement multiprocessing
if __name__ == "__main__":
# print("# cores: %d" % mp.cpu_count())
probability = compute_probability()
print("\nInitial probability of landing on the sequence: " + str(probability) + "\n")
actualProb = 0
empiricalProb = 0
tasks = range(len(idealFlip))
pool = mp.Pool(processes=4)
while flag != 0 or counter == 1000:
temp = pool.map(flip_coin_num, tasks)
# add other processes?
# handles close / join
flip = "".join(temp)
# print(temp)
# print(flip)
if counter != 0:
empiricalProb = empirical_probability(num_matches, counter)
if flip == idealFlip:
num_matches += 1
counter += 1
flip = ""
if counter % check is 0:
print("Probability" + str(empricalProb))
I am creating a graphing program which has to iterate values through a calculation 10000-1000000 times, and then append part of that output to a list. In order to change which list it is appended to, there are ~3 if statements inside that loop. While it would logically be faster to use the if statement first, is there a significant amount of time saved?
As an example:
output = []
append_to = "pol"
for i in range(10000):
if append_to == "pol":
output.append(np.cos(i))
else:
output.append(np.sin(i))
Would this be significantly slower than:
output = []
append_to = "pol"
if append_to == "pol":
for i in range(10000):
output.append(np.cos(i))
else:
for i in range(10000):
output.append(np.sin(i))
Why dont just try?
import numpy as np
import timeit
def one():
output = []
append_to = "pol"
for i in range(10000):
if append_to == "pol":
output.append(np.cos(i))
else:
output.append(np.sin(i))
def two():
output = []
append_to = "pol"
if append_to == "pol":
for i in range(10000):
output.append(np.cos(i))
else:
for i in range(10000):
output.append(np.sin(i))
print(timeit.timeit('f()', 'from __main__ import one as f', number=1000))
print(timeit.timeit('f()', 'from __main__ import two as f', number=1000))
Output:
9.042721510999854
8.626055914000062
So yes, it is faster, as expected. And just for you to know the lookup also takes a bit of time, so if you do ap = output.append and then call ap instead of output.append you get a marginal improvement.
Give it a try!
import math, time
time_start = time.time()
output = []
append_to = "pol"
for i in range(10000000):
if append_to == "pol":
output.append(math.cos(i))
else:
output.append(math.sin(i))
print("End: " + str(time.time() - time_start))
For that run, I got 4.278s. For this run:
import math, time
time_start = time.time()
output = []
append_to = "pol"
if append_to == "pol":
for i in range(10000000):
output.append(math.cos(i))
else:
for i in range(10000000):
output.append(math.sin(i))
print("End: " + str(time.time() - time_start))
I got 3.751s.
So there you go!
I have the following code which turns an outlet on/off every 3 seconds.
start_time = time.time()
counter = 0
agent = snmpy4.Agent("192.168.9.50")
while True:
if (counter % 2 == 0):
agent.set("1.3.6.1.4.1.13742.6.4.1.2.1.2.1.1",1)
else:
agent.set("1.3.6.1.4.1.13742.6.4.1.2.1.2.1.1", 0)
time.sleep(3- ((time.time()-start_time) % 3))
counter = counter + 1
Is there a way I can have the loop terminate at any given point if something is entered, (space) for example... while letting the code above run in the mean time
You can put the loop in a thread and use the main thread to wait on the keyboard. If its okay for "something to be entered" can be a line with line feed (e.g., type a command and enter), then this will do
import time
import threading
import sys
def agent_setter(event):
start_time = time.time()
counter = 0
#agent = snmpy4.Agent("192.168.9.50")
while True:
if (counter % 2 == 0):
print('agent.set("1.3.6.1.4.1.13742.6.4.1.2.1.2.1.1",1)')
else:
print('agent.set("1.3.6.1.4.1.13742.6.4.1.2.1.2.1.1", 0)')
if event.wait(3- ((time.time()-start_time) % 3)):
print('got keyboard')
event.clear()
counter = counter + 1
agent_event = threading.Event()
agent_thread = threading.Thread(target=agent_setter, args=(agent_event,))
agent_thread.start()
for line in sys.stdin:
agent_event.set()
Reading the documentation: https://docs.python.org/2/library/multiprocessing.html
I decided to write a cpu intensive code and compare multiprocessing with serial computation.
First of all, if this library is using multiprocessing, then why I only see 1 python.exe process?
Secondly, why serial computation takes 12 seconds while multiprocessed one takes 22 seconds?
serial code:
from datetime import datetime
def calc_fib(ind):
fb = 1
if ind >= 3:
prev = 1
i = 2
while i < ind:
prev_tmp = fb
fb += prev
prev = prev_tmp
i += 1
return fb
def long_calc_fib(ind):
val = 0
for j in range(500):
val = calc_fib(ind)
return val
if __name__ == "__main__":
t1 = datetime.now()
for i in range(10):
tmp = long_calc_fib(10000)
t2 = datetime.now()
print str(t2 - t1)
multiprocessing pool code:
from datetime import datetime
from multiprocessing.pool import ThreadPool
def calc_fib(ind):
fb = 1
if ind >= 3:
prev = 1
i = 2
while i < ind:
prev_tmp = fb
fb += prev
prev = prev_tmp
i += 1
return fb
def long_calc_fib(ind):
val = 0
for j in range(500):
val = calc_fib(ind)
return val
if __name__ == "__main__":
t1 = datetime.now()
pool = ThreadPool(processes=10)
async_results = []
for i in range(10):
async_results.append(pool.apply_async(long_calc_fib, (10000,)))
for res in async_results:
tmp = res.get()
t2 = datetime.now()
print str(t2 - t1)
My mistake.
I must have used Pool instead of ThreadPool.
By chaning ThreadPool to Pool, I reduced the time to 3 seconds.