I am creating a graphing program which has to iterate values through a calculation 10000-1000000 times, and then append part of that output to a list. In order to change which list it is appended to, there are ~3 if statements inside that loop. While it would logically be faster to use the if statement first, is there a significant amount of time saved?
As an example:
output = []
append_to = "pol"
for i in range(10000):
if append_to == "pol":
output.append(np.cos(i))
else:
output.append(np.sin(i))
Would this be significantly slower than:
output = []
append_to = "pol"
if append_to == "pol":
for i in range(10000):
output.append(np.cos(i))
else:
for i in range(10000):
output.append(np.sin(i))
Why dont just try?
import numpy as np
import timeit
def one():
output = []
append_to = "pol"
for i in range(10000):
if append_to == "pol":
output.append(np.cos(i))
else:
output.append(np.sin(i))
def two():
output = []
append_to = "pol"
if append_to == "pol":
for i in range(10000):
output.append(np.cos(i))
else:
for i in range(10000):
output.append(np.sin(i))
print(timeit.timeit('f()', 'from __main__ import one as f', number=1000))
print(timeit.timeit('f()', 'from __main__ import two as f', number=1000))
Output:
9.042721510999854
8.626055914000062
So yes, it is faster, as expected. And just for you to know the lookup also takes a bit of time, so if you do ap = output.append and then call ap instead of output.append you get a marginal improvement.
Give it a try!
import math, time
time_start = time.time()
output = []
append_to = "pol"
for i in range(10000000):
if append_to == "pol":
output.append(math.cos(i))
else:
output.append(math.sin(i))
print("End: " + str(time.time() - time_start))
For that run, I got 4.278s. For this run:
import math, time
time_start = time.time()
output = []
append_to = "pol"
if append_to == "pol":
for i in range(10000000):
output.append(math.cos(i))
else:
for i in range(10000000):
output.append(math.sin(i))
print("End: " + str(time.time() - time_start))
I got 3.751s.
So there you go!
Related
I wanted to know which one faster counter
1)
from threading import Thread
def c(output, i):
if i in output:
output[i] += 1
else:
output[i] = 1
def build(itera):
output = {}
for i in itera:
Thread(target=c, args=(output, i)).start()
return output
def build(itera):
output = {}
for i in itera:
if i in output:
output[i] += 1
else:
output[i] = 1
return output
from collections import Counter
Counter("12342")
And if any code which performs same this but is faster all all three block of code please tell me
Use this to figure out which methods takes the least amount of time:
import time
start = time.time()
***
The code you want to test out
***
end = time.time()
time_taken = round(end - start), 2
print(time_taken)
The output is in seconds.
Using timeit:
import timeit
def do_something():
pass
timeit.timeit(f"{do_something()}")
Add this into your code, and see which runs quicker:
from datetime import datetime
start = datetime.now()
#code here
end = datetime.now()
total = end - start
print(total.total_seconds())
I want to print list data on the specific delays which are on another list. I Want to loop this process for a specific time, but I'm unable to implement it in a thread.
from time import sleep
import datetime
now = datetime.datetime.now()
Start_Time = datetime.datetime.now()
Str_time = Start_Time.strftime("%H:%M:%S")
End_Time = '11:15:00'
class sampleTest:
#staticmethod
def test():
list1 = ["Hello", "Hi", "Ola"]
list2 = [5, 10, 7]
# print(f"{data} delay {delay} & time is {t} ")
# sleep(delay)
i = 0
while i < len(list1):
t = datetime.datetime.now().strftime('%H:%M:%S')
print(f"{list1[i]} delay {list2[i]} & time is {t} ")
sleep(list2[i])
i += 1
else:
print("All Data is printed")
if __name__ == '__main__':
obj = sampleTest
while Str_time < End_Time:
obj.test()
Str_time = datetime.datetime.now().strftime("%H:%M:%S")
else:
print("Time Is done")
Expected output: On first, loop it should print all list data but in the second loop, it should run as per the delay.
1st time: Hello, Hi, Ola
after that
1. Every 5 seconds it should print Hello
2. Every 10 seconds it should print Hi
3. Every 7seconds it should print Ola
Actual Output: List of data is getting printed as per the delay.
Hello delay 5 & time is 11:41:45
Hi delay 10 & time is 11:41:50
Ola delay 3 & time is 11:42:00
All Data is printed
Hello delay 5 & time is 11:42:03
Hi delay 10 & time is 11:42:08
Ola delay 3 & time is 11:42:18
You can try comparing the current time with the start time, for example:
time.sleep(1);
diff = int(time.time() - start_time)
if (diff % wait_time == 0):
print(text_to_print)
Here is the full code implementing this:
from time import sleep
import time
import datetime
now = datetime.datetime.now()
Start_Time = datetime.datetime.now()
Str_time = Start_Time.strftime("%H:%M:%S")
End_Time = '11:15:00'
starttime=time.time()
diff = 0
class sampleTest:
#staticmethod
def test():
list1 = ["Hello", "Hi", "Ola"]
list2 = [5, 10, 7]
for i in range(len(list1)):
if (diff % list2[i] == 0):
t = datetime.datetime.now().strftime('%H:%M:%S')
print(f"{list1[i]} delay {list2[i]} & time is {t} ")
if __name__ == '__main__':
obj = sampleTest
while Str_time < End_Time:
obj.test()
time.sleep(1);
diff = int(time.time() - starttime)
Str_time = datetime.datetime.now().strftime("%H:%M:%S")
else:
print("Time Is done")
In accordance with your desired output, I believe threads are the best option, which means:
from time import sleep
import datetime
import threading
now = datetime.datetime.now()
Start_Time = datetime.datetime.now()
Str_time = Start_Time.strftime("%H:%M:%S")
End_Time = '11:15:00'
class sampleTest:
def __init__(self):
self.run = True
print ("1st time: Hello, Hi, Ola")
print ("Now: " + datetime.datetime.now().strftime('%H:%M:%S'))
def test(self, i):
list1 = ["Hello", "Hi", "Ola"]
list2 = [5, 10, 7]
while self.run:
sleep(list2[i])
t = datetime.datetime.now().strftime('%H:%M:%S')
print(f"{list1[i]} delay {list2[i]} & time is {t}")
def stop(self):
self.run = False
if __name__ == '__main__':
obj = sampleTest()
t1 = threading.Thread(target=obj.test,args=(0,))
t2 = threading.Thread(target=obj.test,args=(1,))
t3 = threading.Thread(target=obj.test,args=(2,))
t1.start()
t2.start()
t3.start()
while Str_time < End_Time:
Str_time = datetime.datetime.now().strftime("%H:%M:%S")
else:
obj.stop()
t1.join()
t2.join()
t3.join()
print("All data is printed")
print("Time Is done")
I need a loop to restart because I'm limited as to how many elements I can process before google blocks me using googletrans module.
I've worked out that I can get about fifty elements using a random time delay until Google blocks me but I need it to loop through about 850.
As far as I know there is no way to restart a loop so I tried a while loop but it doesn't seen to update the loop and finishes after the first block has been processed.
I am also randomly setting a second interval between between translations to keep the loop working. it goes from 0 to 50 and then stops the loops
my code
from googletrans import Translator
from random import randint
import datetime
should_restart = True
spanish_subs = get_subs(page)# list of over 850 sentances to be translated
counter_num = 1
translator = Translator()
start_block = 0
end_block = 50
while should_restart:
print('start_block ' + str(start_block))# see where the loop is in the process
print('end_block ' + str(end_block))
if end_block < len(get_subs(page)):
translations = translator.translate(spanish_subs[start_block:end_block], src='es')
for translation in translations:
english_subs.append(translation.text)
print('Loop ' + str(counter_num + 1 ))
time.sleep(random())# pauses between 1 and 10 seconds
if end_block >= len(get_subs(page)):
should_restart = False
with open('englist_translation.txt', 'w') as f:
for item in english_subs:
f.write("%s\n" % item)
print('Finished')
start_block = end_block + 50
end_block = end_block + 50 # date the end block
print(english_subs)# print to console to see what was translated
return english_subs
def random():
random_number = randint(0, 10)
return random_number
This setup will help you get past Googles limits, it is a slow process but it works on a list of 50,000 characters.
from random import randint
import time
from googletrans import Translator
def get_script_eng():
should_restart = True
spanish_subs = get_subs(page)
counter_num = 1
translator = Translator()
start_block = 0
end_block = 50
while end_block < len(get_subs(page)):
print('start_block ' + str(start_block))
print('end_block ' + str(end_block))
if should_restart == True:
translations = translator.translate(spanish_subs[start_block:end_block], src='es')
for translation in translations:
english_subs.append(translation.text)
time.sleep(random())
print('translation ' + str(counter_num + 1 ))
else:
should_restart = False
with open('spanish.txt', 'w') as f:
for item in english_subs:
f.write("%s\n" % item)
print('Finished')
start_block = start_block + 50
end_block = end_block + 50
print(english_subs)
return english_subs
And then to generate the random time delay
def random():
random_number = randint(0, 10)
return random_number
I'm looking for a faster of way of sampling a single element at random from a large Python set. Below I've benchmarked three obvious examples. Is there a faster way of doing this?
import random
import time
test_set = set(["".join(["elem-", str(l)]) for l in range(0, 1000000)])
t0 = time.time()
random_element = random.choice(list(test_set))
print(time.time() - t0)
t0 = time.time()
random_element = random.sample(test_set, 1)
print(time.time() - t0)
t0 = time.time()
rand_idx = random.randrange(0, len(test_set)-1)
random_element = list(test_set)[rand_idx]
print(time.time() - t0)
Output:
0.0692291259765625
0.06741929054260254
0.07094502449035645
You could use numpy and add it to your benchmarks.
import numpy
random_num = numpy.randit(0, 1000000)
element = 'elem-' + str(random_num)
test_array = numpy.array([x for x in test_set])
Specifically, this is a piece of code that benchmarks the different methods:
random_choice_times = []
random_sample_times = []
random_randrange_times = []
numpy_choince_times = []
for i in range(0,10):
t0 = time.time()
random_element = random.choice(list(test_set))
time_elps = time.time() - t0
random_choice_times.append(time_elps)
t0 = time.time()
random_element = random.sample(test_set, 1)
time_elps = time.time() - t0
random_sample_times.append(time_elps)
t0 = time.time()
rand_idx = random.randrange(0, len(test_set)-1)
random_element = list(test_set)[rand_idx]
time_elps = time.time() - t0
random_randrange_times.append(time_elps)
t0 = time.time()
random_num = numpy.random.choice(numpy.array(test_array))
time_elps = time.time() - t0
numpy_choince_times.append(time_elps)
print("Avg time for random.choice: ", sum(random_choice_times) /10)
print("Avg time for random.sample: ", sum(random_sample_times) /10)
print("Avg time for random.randrange: ", sum(random_randrange_times) /10)
print("Avg time for numpy.choice: ", sum(numpy_choince_times) /10)
Here are the times
>>> Avg time for random.choice: 0.06497154235839844
>>> Avg time for random.sample: 0.06054067611694336
>>> Avg time for random.randrange: 0.05938301086425781
>>> Avg time for numpy.choice: 0.017636775970458984
You could try this.
def random_set_ele(set_: set):
copy = set_
return copy.pop()
test_set = set(["".join(["elem-", str(l)]) for l in range(0, 1000000)])
start = perf_counter()
print(random_set_ele(test_set))
print(perf_counter()-start)
Result:
elem-57221
0.00016391400276916102
The .pop() method for a set randomly extracts and returns an element and pops it out of the set. We make a copy of the set before popping the element so that the original list is not modified.
I wrote 3 different codes to compare having threads vs. not having threads. Basically measuring how much time I save by using threading and the result didn't make any sense.
Here are my codes:
import time
def Function():
global x
x = 0
while x < 300000000:
x += 1
print x
e1 = time.clock()
E1 = time.time()
Function()
e2 = time.clock()
E2 = time.time()
print e2 - e1
print E2 - E1
When I ran this, I got this as output:
26.6358742929
26.6440000534
Then I wrote another function as shown below and split counting up to 300 million into counting 3, 100 millions:
import time
def Function():
global x
x = 0
while x < 100000000:
x += 1
print x
def Function2():
global x
x = 0
while x < 100000000:
x += 1
print x
def Function3():
global x
x = 0
while x < 100000000:
x += 1
print x
e1 = time.clock()
E1 = time.time()
Function()
Function2()
Function3()
e2 = time.clock()
E2 = time.time()
print e2 - e1
print E2 - E1
The output of the following function was:
26.0577638729
26.0629999638
and lastly I created 3 threads and ran each function on a single thread:
import time
import threading
e1 = time.clock()
E1 = time.time()
def Function1():
global x
x = 0
while x < 100000000:
x += 1
print x
def Function2():
global x
x = 0
while x < 100000000:
x += 1
print x
def Function3():
global x
x = 0
while x < 100000000:
x += 1
print x
new_thread1 = threading.Thread(target = Function1() , args = ())
new_thread2 = threading.Thread(target = Function2(), args = ())
new_thread3 = threading.Thread(target = Function3(), args = ())
e1 = time.clock()
E1 = time.time()
new_thread1.start()
new_thread2.start()
new_thread3.start()
e2 = time.clock()
E2 = time.time()
print e2 - e1
print E2 - E1
The out put of this one was:
0.000601416222253
0.0
These numbers make no sense to me. I'm just trying to measure how much time does threading save me. I've looked up in the documentation and using time.time
and time.clock made sense to me, but it doesn't make sense here. Also, the actual time for 1st and 2nd snippet were about 10 seconds and 3rd one about 5
you are calling it wrong ....
new_thread1 = threading.Thread(target = Function1 , args = ())
note that you should not CALL the function when you create the thread
those timings really mean nothing they are both essentially zero because all you are timing is 3 instant return function calls to start
note to get the output you will need to wait for each thread to finish (since your current code does not do this )
EDIT FOR MORE INFO
with threading you are locked by the gil to one python instruction at a time... typically this is not a problem since you are usually waiting on disk io... In your example code however it is 100% computation so threading really doesnt improve your time ... Multiprocessing may as demonstrated below
import time
import threading
import multiprocessing
def fn():
'''since all 3 functions were identical you can just use one ...'''
x = 0
while x < 100000000:
x += 1
def TEST_THREADS():
new_thread1 = threading.Thread(target = fn , args = ())
new_thread2 = threading.Thread(target = fn, args = ())
new_thread3 = threading.Thread(target = fn, args = ())
new_thread1.start()
new_thread2.start()
new_thread3.start()
new_thread1.join()
new_thread2.join()
new_thread3.join()
def TEST_NORMAL():
fn()
fn()
fn()
def TEST_MULTIPROCESSING():
new_thread1 = multiprocessing.Process(target = fn , args = ())
new_thread2 = multiprocessing.Process(target = fn, args = ())
new_thread3 = multiprocessing.Process(target = fn, args = ())
new_thread1.start()
new_thread2.start()
new_thread3.start()
new_thread1.join()
new_thread2.join()
new_thread3.join
if __name__ == "__main__":
'''It is very important to use name == __main__ guard code with threads and multiprocessing'''
import timeit
print "Time to Run 1x: %0.2fs"%(timeit.timeit(fn,number=1),)
print "NORMAL:%0.2fs"%(timeit.timeit(TEST_NORMAL,number=1),)
print "Threaded: %0.2fs"%(timeit.timeit(TEST_THREADS,number=1),)
print "Multiprocessing: %0.2fs"%(timeit.timeit(TEST_MULTIPROCESSING,number=1),)
I get the following output
Time to Run 1x: 3.71181102665
NORMAL:11.0136830117
Threaded: 23.392143814
Multiprocessing: 3.80878260515