callback doesnot work in pool.map_async() - python

In the following simple program, the callback passed to pool.map_async() does not seem to work properly. Could someone point out what is wrong?
import os
import multiprocessing
import time
def cube(x):
return "{}^3={}".format(x, x**3)
def prt(value):
print(value)
if __name__ == "__main__":
pool = multiprocessing.Pool(3)
start_time = time.perf_counter()
result = pool.map_async(cube, range(1,1000), callback=prt)
finish_time = time.perf_counter()
print(f"Program finished in {finish_time-start_time} seconds")
$ python3 /var/tmp/cube_map_async_callback.py
Program finished in 0.0001492840237915516 seconds
$

Related

Why using Asyncio is not reducing the overall execution time in Python and run functions concurrently?

I am trying to run a piece of code using asyncio and reduce the time execution of the whole code. Below is my code which is taking around 6 seconds to fully execute itself
Normal function calls- (approach 1)
from time import time, sleep
import asyncio
def find_div(range_, divide_by):
lis_ = []
for i in range(range_):
if i % divide_by == 0:
lis_.append(i)
print("found numbers for range {}, divided by {}".format(range_, divide_by))
return lis_
if __name__ == "__main__":
start = time()
find_div(50800000, 341313)
find_div(10005200, 32110)
find_div(50000340, 31238)
print(time()-start)
The output of the above code is just the total execution time which is 6 secs.
Multithreaded Approach- (approach 2)
Used multithreading in this, but surprisingly the time increased
from time import time, sleep
import asyncio
import threading
def find_div(range_, divide_by):
lis_ = []
for i in range(range_):
if i % divide_by == 0:
lis_.append(i)
print("found numbers for range {}, divided by {}".format(range_, divide_by))
return lis_
if __name__ == "__main__":
start = time()
t1 = threading.Thread(target=find_div, args=(50800000, 341313))
t2 = threading.Thread(target=find_div, args=(10005200, 32110))
t3 = threading.Thread(target=find_div, args=(50000340, 31238))
t1.start()
t2.start()
t3.start()
t1.join()
t2.join()
t3.join()
print(time()-start)
The output of the above code is 12 secs.
Multiprocessing approach- (approach 3)
from time import time, sleep
import asyncio
from multiprocessing import Pool
def multi_run_wrapper(args):
return find_div(*args)
def find_div(range_, divide_by):
lis_ = []
for i in range(range_):
if i % divide_by == 0:
lis_.append(i)
print("found numbers for range {}, divided by {}".format(range_, divide_by))
return lis_
if __name__ == "__main__":
start = time()
with Pool(3) as p:
p.map(multi_run_wrapper,[(50800000, 341313),(10005200, 32110),(50000340, 31238)])
print(time()-start)
The output of the multiprocessing code is 3 secs which is better than the normal function call approach.
Asyncio Approach- (approach 4)
from time import time, sleep
import asyncio
async def find_div(range_, divide_by):
lis_ = []
for i in range(range_):
if i % divide_by == 0:
lis_.append(i)
print("found numbers for range {}, divided by {}".format(range_, divide_by))
return lis_
async def task():
tasks = [find_div(50800000, 341313),find_div(10005200, 32110),find_div(50000340, 31238)]
result = await asyncio.gather(*tasks)
print(result)
if __name__ == "__main__":
start = time()
asyncio.run(task())
print(time()-start)
The above code is also taking around 6 seconds which is the same as the normal execution function call that is the Approach 1.
Problem-
Why is my Asyncio approach not working as expected and reducing the overall time? What is wrong in the code?
You have code that exclusively uses the CPU.
Code like this cannot be sped up using async.
Async shines when you have tasks that are waiting on something not CPU related, e.g. a network request or reading from disk. This is generally true for all languages.
In python, also the thread based approach will not help you, as this still restricts you to a single core and not true parallel execution. This is due to the Global Interpreter Lock (GIL). The overhead of starting and switching between threads makes it slower than the simple version.
In this regard, threads are similar to async in python, it only helps if the time you are waiting is not spend mainly on the CPU or if you are calling code that's not bound by the GIL, e.g. c extensions.
Using multiprocessing really uses multiple cpu cores, so it is faster than the normal solution.
asyncio def run(time):
await asyncio.sleep(time)
This code takes 1 min 40 seconds
from datetime import datetime
now = datetime.now()
task=[]
for i in range(10):
await run(10)
now1=datetime.now()
print(now1-now)
OPTIMIZED USING async-->
THis takes 10 seconds only
from datetime import datetime
now = datetime.now()
task=[]
for i in range(10):
task.append(asyncio.create_task(run(10)))
await asyncio.gather(*task)
now1=datetime.now()
print(now1-now)

Python concurrent.futures trying to import functions

So I got 2 .py files and am trying to import the test function from the first to the secon one. But every time I try that I just get a "BrokenProcessPool: A process in the process pool was terminated abruptly while the future was running or pending." Error. I have no idea what Im messing up help is very much appreciated
parallel.py:
import time
from concurrent import futures
def test(t):
time.sleep(t)
print("Ich habe {} Sekunden gewartet. Zeit {:.0f}".format(t, time.time()))
def main():
print("Startzeit: {:.0f}".format(time.time()))
start = time.perf_counter()
with futures.ThreadPoolExecutor(max_workers=3) as ex:
ex.submit(test, 9)
ex.submit(test, 4)
ex.submit(test, 5)
ex.submit(test, 6)
print("Alle Aufgaben gestartet.")
print("Alle Aufgaben erledigt.")
finish = time.perf_counter()
print("Fertig in ",round(finish-start,2)," seconds(s)")
if __name__ == "__main__":
main()
parallel2.py:
import parallel
import time
import concurrent.futures
# =============================================================================
# def test(t):
# time.sleep(t)
# return ("Ich habe {} Sekunden gewartet. Zeit {:.0f}".format(t, time.time()))
# =============================================================================
def main():
print("Startzeit: {:.0f}".format(time.time()))
start = time.perf_counter()
with concurrent.futures.ProcessPoolExecutor() as executor:
f1 = executor.submit(parallel.test, 9)
f2 = executor.submit(parallel.test, 5)
f3 = executor.submit(parallel.test, 4)
f4 = executor.submit(parallel.test, 6)
print(f1.result())
print(f2.result())
print(f3.result())
print(f4.result())
finish = time.perf_counter()
print("Fertig in ",round(finish-start,2)," seconds(s)")
if __name__ =="__main__":
main()
Test that solution:
Remove a condition if __name__ == "__main__" from parallel.py.
You put the condition in both scripts: if __name__ == "__main__" to execute the main function.
When doing this your script checks if it is the main module, and executes the function only if the return is true.
When you import another script, your module is no longer "__main__", so the return does not satisfy the condition imposed for the function to run. .

How can I measure execution time of a Python program (functional structure)?

I need to measure the execution time of a Python program having the following structure:
import numpy
import pandas
def func1():
code
def func2():
code
if __name__ == '__main__':
func1()
func2()
If I want to use "time.time()", where should I put them in the code? I want to get the execution time for the whole program.
Alternative 1:
import time
start = time.time()
import numpy
import pandas
def func1():
code
def func2():
code
if __name__ == '__main__':
func1()
func2()
end = time.time()
print("The execution time is", end - start)
Alternative 2:
import numpy
import pandas
def func1():
code
def func2():
code
if __name__ == '__main__':
import time
start = time.time()
func1()
func2()
end = time.time()
print("The execution time is", end - start)
In linux: you could run this file test.py using the time command
time python3 test.py
After your program runs it will give you the following output:
real 0m0.074s
user 0m0.004s
sys 0m0.000s
this link will tell the difference between the three times you get
The whole program:
import time
t1 = time.time()
import numpy
import pandas
def func1():
code
def func2():
code
if __name__ == '__main__':
func1()
func2()
t2 = time.time()
print("The execution time is", t2 - t1)

executing specific statement at a given rate in python

I want to write a code which execute a statement specified number of times per second,
Many of you might be familier about the term rate
Here i want rate to be 30 per second
say i want to execute a function 30 times per second for 60 seconds
means rate=30/sec duration=60sec
Can any one tell me is their any api available in python to do the same ?
The sched module is intended for exactly this:
from __future__ import division
import sched
import time
scheduler = sched.scheduler(time.time, time.sleep)
def schedule_it(frequency, duration, callable, *args):
no_of_events = int( duration / frequency )
priority = 1 # not used, lets you assign execution order to events scheduled for the same time
for i in xrange( no_of_events ):
delay = i * frequency
scheduler.enter( delay, priority, callable, args)
def printer(x):
print x
# execute printer 30 times a second for 60 seconds
schedule_it(1/30, 60, printer, 'hello')
scheduler.run()
For a threaded environment, the use of sched.scheduler can be replaced by threading.Timer:
from __future__ import division
import time
import threading
def schedule_it(frequency, duration, callable, *args, **kwargs):
no_of_events = int( duration / frequency )
for i in xrange( no_of_events ):
delay = i * frequency
threading.Timer(delay, callable, args=args, kwargs=kwargs).start()
def printer(x):
print x
schedule_it(5, 10, printer, 'hello')
Try using threading.Timer:
def hello():
print "hello, world"
t = Timer(30.0, hello)
t.start() # after 30 seconds, "hello, world" will be printed
You can use time.time() to do what you want:
import time
def your_function():
# do something...
while True:
start = time.time() # gives current time in seconds since Jan 1, 1970 (in Unix)
your_function()
while True:
current_time = time.time()
if current_time - start >= 1.0/30.0:
break
This will make sure that the delay between calls of your_function is very close to 1/30 of a second, even if your_function takes some time to run.
There is another way: using Pythons built-in scheduling module, sched. I never used it, so I can't help you there, but have a look at it.
After some time spending i discovered how to do it well i used multiprocessing in python to achieve it
here's my solution
#!/usr/bin/env python
from multiprocessing import Process
import os
import time
import datetime
def sleeper(name, seconds):
time.sleep(seconds)
print "PNAME:- %s"%name
if __name__ == '__main__':
pros={}
processes=[]
i=0
time2=0
time1=datetime.datetime.now()
for sec in range(5):
flag=0
while flag!=1:
time2=datetime.datetime.now()
if (time2-time1).seconds==1:
time1=time2
flag=1
print "Executing Per second"
for no in range(5):
i+=1
pros[i] = Process(target=sleeper, args=("Thread-%d"%i, 1))
j=i-5
for no in range(5):
j+=1
pros[j].start()
j=i-5
for no in range(5):
j+=1
processes.append(pros[j])
for p in processes:
p.join()

timing a python program with threads

I have the following block of code that is part of a larger program. I am trying to get it to print the execution time once all of the threads are closed but can't seem to get it to work. Any ideas?
import time
import csv
import threading
import urllib.request
def openSP500file():
SP500 = reader(open(r'C:\Users\test\Desktop\SP500.csv', 'r'), delimiter=',')
for x in SP500:
indStk = x[0]
t1 = StockData(indStk)
t1.start()
if not t1.isAlive():
print(time.clock()-start_time, 'seconds')
else:
pass
def main():
openSP500file()
if __name__ == '__main__':
start_time = time.clock()
main()
Thanks!
You aren't waiting for all the threads to finish (only the last one created). Perhaps something like this in your thread-spawning loop?
threads = []
for x in SP500:
t1 = StockData(x[0])
t1.start()
threads.append(t1)
for t in threads:
t.join()
... print running time

Categories