python multiprocessing is not using multiple cores - python

Reading the documentation: https://docs.python.org/2/library/multiprocessing.html
I decided to write a cpu intensive code and compare multiprocessing with serial computation.
First of all, if this library is using multiprocessing, then why I only see 1 python.exe process?
Secondly, why serial computation takes 12 seconds while multiprocessed one takes 22 seconds?
serial code:
from datetime import datetime
def calc_fib(ind):
fb = 1
if ind >= 3:
prev = 1
i = 2
while i < ind:
prev_tmp = fb
fb += prev
prev = prev_tmp
i += 1
return fb
def long_calc_fib(ind):
val = 0
for j in range(500):
val = calc_fib(ind)
return val
if __name__ == "__main__":
t1 = datetime.now()
for i in range(10):
tmp = long_calc_fib(10000)
t2 = datetime.now()
print str(t2 - t1)
multiprocessing pool code:
from datetime import datetime
from multiprocessing.pool import ThreadPool
def calc_fib(ind):
fb = 1
if ind >= 3:
prev = 1
i = 2
while i < ind:
prev_tmp = fb
fb += prev
prev = prev_tmp
i += 1
return fb
def long_calc_fib(ind):
val = 0
for j in range(500):
val = calc_fib(ind)
return val
if __name__ == "__main__":
t1 = datetime.now()
pool = ThreadPool(processes=10)
async_results = []
for i in range(10):
async_results.append(pool.apply_async(long_calc_fib, (10000,)))
for res in async_results:
tmp = res.get()
t2 = datetime.now()
print str(t2 - t1)

My mistake.
I must have used Pool instead of ThreadPool.
By chaning ThreadPool to Pool, I reduced the time to 3 seconds.

Related

Which is Faster Counter Python

I wanted to know which one faster counter
1)
from threading import Thread
def c(output, i):
if i in output:
output[i] += 1
else:
output[i] = 1
def build(itera):
output = {}
for i in itera:
Thread(target=c, args=(output, i)).start()
return output
def build(itera):
output = {}
for i in itera:
if i in output:
output[i] += 1
else:
output[i] = 1
return output
from collections import Counter
Counter("12342")
And if any code which performs same this but is faster all all three block of code please tell me
Use this to figure out which methods takes the least amount of time:
import time
start = time.time()
***
The code you want to test out
***
end = time.time()
time_taken = round(end - start), 2
print(time_taken)
The output is in seconds.
Using timeit:
import timeit
def do_something():
pass
timeit.timeit(f"{do_something()}")
Add this into your code, and see which runs quicker:
from datetime import datetime
start = datetime.now()
#code here
end = datetime.now()
total = end - start
print(total.total_seconds())

Implementing Numpy into nth factorial of n

I've tried a few methods of implementing numpy into my program below, but every way I've attempted it drastically decreases performance. Is there expected with my use case, or is there a way to implement it with a performance increase instead of decrease?
import multiprocessing
import argparse
from datetime import datetime
from math import log10
parser = argparse.ArgumentParser(
formatter_class=argparse.HelpFormatter,
description="Calcs n factorial",
usage=""
)
parser.add_argument("-n", "--number", type=int, default=2)
args = parser.parse_args()
def getlog(send_end, i, threads, num, n, inc):
begin = datetime.now()
start = num-inc*i
end = num-inc*(i+1) if i < threads-1 else 0
output = sum(map(log10, range(start, end, -n)))
send_end.send(output)
final = datetime.now()
duration = final-begin
print("{},{},{},{}".format(i, duration, start, end))
def main():
n = args.number
num = int('1'*n)
threads = multiprocessing.cpu_count() if num/multiprocessing.cpu_count() > multiprocessing.cpu_count() else 1
inc = int(num/threads)
inc -= inc%n
jobs = []
pipe_list = []
for i in range(threads):
recv_end, send_end = multiprocessing.Pipe(False)
p = multiprocessing.Process(target=getlog, args=(send_end, i, threads, num, n, inc))
jobs.append(p)
pipe_list.append(recv_end)
p.start()
for proc in jobs:
proc.join()
e = sum([output.recv() for output in pipe_list])
print('%.2fe%d' % (10**(e % 1), e // 1))
if __name__ == '__main__':
start = datetime.now()
main()
end = datetime.now()
print(end-start)

Retrieve values from multiprocessing library

I am trying to use multiprocesing library to compare the performance of my processor on 1 core vs 2 cores.
Therefore I calculate a great product using 1 loop, 2 loops on 1 core, and 2 loops on 2 cores (1 core/loop). The problem is that the value of D1.result and D2.result are 0 although they are expected to be the product of the "half/loop".
The code is the following:
import random
from multiprocessing import Process as Task, freeze_support
N = 10 ** 3
l = [random.randint(2 ** 999, 2 ** 1000 - 1) for x in range(N)]
# ---------------------------------------------------------------
class Loop:
def __init__(self):
self.result=0
def boucle(self,start,end):
self.result = l[start]
for v in l[start+1:end]:
self.result = self.result*v
# ---------------------------------------------------------------
if __name__=="__main__":
print("1 Loop without multiprocessing")
A=Loop()
sta = time.time()
ra=A.boucle(0,N)
end = time.time()
print("--> Time :", end - sta)
#----------------------------------------------------------------------
print("2 Loops without multiprocessing")
B1=Loop()
B2=Loop()
sta = time.time()
rb1 = B1.boucle(0, N//2)
rb2 = B2.boucle(N//2, N)
rb = B1.result*B2.result
end = time.time()
print("--> Time :", end - sta)
if rb - A.result == 0 :
check="OK"
else :
check="fail"
print("--> Check :", check)
# ----------------------------------------------------------------------
print("2 Loops with multiprocessing")
freeze_support()
D1=Loop()
D2=Loop()
v1 = Task(target=D1.boucle, args=(0,N//2))
v2 = Task(target=D2.boucle, args=(N//2,N))
sta = time.time()
v1.start()
v2.start()
v1.join()
v2.join()
rd = D1.result*D2.result
end = time.time()
print("D1",D1.result)
print("D2",D2.result)
print("--> Time :", end - sta)
if rd - A.result == 0 :
check="OK"
else :
check="fail"
print("--> Check :", check)
The result of this code is :
1 Loop without multiprocessing
--> Time : 0.5025153160095215
2 Loops without multiprocessing
--> Time : 0.283463716506958
--> Check : OK
2 Loops with multiprocessing
D1 0
D2 0
--> Time : 0.2579989433288574
--> Check : fail
Process finished with exit code 0
Why D1 0 and D2 0 and not the result of the loop ?
Thanks you !
The issue with this code is shown when D1 and D2 are displayed:
In multiprocessing, tasks are executed in a forked process. This process got a copy of the data.
In each forked process the value is properly computed but it is never sent back to main process.
To work around this you can:
Use shared memory to store the result, but in this case you are limited to C types. Your numbers do not fit on 64 bits (max integer size in C), so this is not a good solution.
Use a pool of process, thus data will be shared using queues and you will be able to manage real python types.
This last option requires that "boucle" function returns the result.
Here is the code:
import random
from multiprocessing import Process as Task, freeze_support, Pool
import time
N = 10 ** 3
l = [random.randint(2 ** 999, 2 ** 1000 - 1) for x in range(N)]
# ---------------------------------------------------------------
class Loop:
def __init__(self):
self.result = 0
def boucle(self, start, end):
self.result = l[start]
for v in l[start + 1:end]:
self.result = self.result * v
return self.result
# ---------------------------------------------------------------
if __name__ == "__main__":
print("1 Loop without multiprocessing")
A = Loop()
sta = time.time()
ra = A.boucle(0, N)
end = time.time()
print("--> Time :", end - sta)
# ----------------------------------------------------------------------
print("2 Loops without multiprocessing")
B1 = Loop()
B2 = Loop()
sta = time.time()
rb1 = B1.boucle(0, N // 2)
rb2 = B2.boucle(N // 2, N)
rb = B1.result * B2.result
end = time.time()
print("--> Time :", end - sta)
if rb - A.result == 0:
check = "OK"
else:
check = "fail"
print("--> Check :", check)
# ----------------------------------------------------------------------
print("2 Loops with multiprocessing")
freeze_support()
D1 = Loop()
D2 = Loop()
pool = Pool(processes=2)
with pool:
sta = time.time()
sta = time.time()
rb1 = pool.apply_async(B1.boucle, (0, N // 2))
rb2 = pool.apply_async(B2.boucle, (N // 2, N))
v1 = rb1.get()
v2 = rb2.get()
rd = v1 * v2
end = time.time()
print("D1", D1.result)
print("D2", D2.result)
print("--> Time :", end - sta)
if rd - A.result == 0:
check = "OK"
else:
check = "fail"
print("--> Check :", check)
And the result:
1 Loop without multiprocessing
--> Time : 0.3473360538482666
2 Loops without multiprocessing
--> Time : 0.18696999549865723
--> Check : OK
2 Loops with multiprocessing
D1 0
D2 0
--> Time : 0.1116642951965332
--> Check : OK
You can also use map with the pool to get the value back, but I have not tried it in this case because you only call 2 functions, and pool workers get tasks by "packets of functions - see maxtaskperchild" so it could be possible that only one worker will have taken the 2 functions for itself

Python How Do I Refresh Psutil Result?

I'm trying to make python script to print top 5 processes by cpu/memory usage every minute. However, the cpu result doesn't seem to change when it loops.
How can I get new set of measurements for cpu when it loops?
My code is below.
Thank you for your help!
import psutil
import time;
from functools import cmp_to_key
def log(line):
print(line)
with open("log.txt", "a") as f:
f.write("{}\n".format(line))
def cmpCpu(a, b):
a = a['cpu']
b = b['cpu']
if a > b:
return -1
elif a == b:
return 0
else:
return 1
def cmpMemory(a, b):
a = a['memory']
b = b['memory']
if a > b:
return -1
elif a == b:
return 0
else:
return 1
def getInfo(pid):
p = psutil.Process(pid)
name = p.name()
cpu = p.cpu_percent()
memory = int(p.memory_info().rss/1024/1024)
return {'name':name, 'cpu':cpu, 'memory':memory}
while True:
localtime = time.localtime(time.time())
timestamp = str(localtime.tm_hour)+":"+str(localtime.tm_min)
log(timestamp)
processes = []
for i in psutil.pids():
processes.append(getInfo(i))
#Sort by cpu usage
processes.sort(key=cmp_to_key(cmpCpu))
for i in range(5):
info = processes[i]
info = info['name']+", "+str(info['cpu'])+"%"
log(info)
#Sort by memory usage
processes.sort(key=cmp_to_key(cmpMemory))
for i in range(5):
info = processes[i]
info = info['name']+", "+str(info['memory'])+"MB"
log(info)
time.sleep(60)
It seems like psutil.process_iter is the answer. The code below works.
import psutil
import time;
from functools import cmp_to_key
def log(line):
print(line)
with open("log.txt", "a") as f:
f.write("{}\n".format(line))
def cmpCpu(a, b):
a = a['cpu']
b = b['cpu']
if a > b:
return -1
elif a == b:
return 0
else:
return 1
def cmpMemory(a, b):
a = a['memory']
b = b['memory']
if a > b:
return -1
elif a == b:
return 0
else:
return 1
while True:
localtime = time.localtime(time.time())
timestamp = str(localtime.tm_hour)+":"+str(localtime.tm_min)
log(timestamp)
#Collect information for each process
processes = []
for proc in psutil.process_iter(attrs=['name', 'cpu_percent', 'memory_info']):
processes.append({'name': proc.info['name'], 'cpu': proc.info['cpu_percent'], 'memory': int(proc.info['memory_info'].rss/1024/1024)})
#Sort by cpu usage
log("CPU:")
processes.sort(key=cmp_to_key(cmpCpu))
for i in range(5):
info = processes[i]
info = info['name']+", "+str(info['cpu'])+"%"
log(info)
#Sort by memory usage
log("Memory:")
processes.sort(key=cmp_to_key(cmpMemory))
for i in range(5):
info = processes[i]
info = info['name']+", "+str(info['memory'])+"MB"
log(info)
time.sleep(60)

Threading in Python takes longer time instead of making it faster?

I wrote 3 different codes to compare having threads vs. not having threads. Basically measuring how much time I save by using threading and the result didn't make any sense.
Here are my codes:
import time
def Function():
global x
x = 0
while x < 300000000:
x += 1
print x
e1 = time.clock()
E1 = time.time()
Function()
e2 = time.clock()
E2 = time.time()
print e2 - e1
print E2 - E1
When I ran this, I got this as output:
26.6358742929
26.6440000534
Then I wrote another function as shown below and split counting up to 300 million into counting 3, 100 millions:
import time
def Function():
global x
x = 0
while x < 100000000:
x += 1
print x
def Function2():
global x
x = 0
while x < 100000000:
x += 1
print x
def Function3():
global x
x = 0
while x < 100000000:
x += 1
print x
e1 = time.clock()
E1 = time.time()
Function()
Function2()
Function3()
e2 = time.clock()
E2 = time.time()
print e2 - e1
print E2 - E1
The output of the following function was:
26.0577638729
26.0629999638
and lastly I created 3 threads and ran each function on a single thread:
import time
import threading
e1 = time.clock()
E1 = time.time()
def Function1():
global x
x = 0
while x < 100000000:
x += 1
print x
def Function2():
global x
x = 0
while x < 100000000:
x += 1
print x
def Function3():
global x
x = 0
while x < 100000000:
x += 1
print x
new_thread1 = threading.Thread(target = Function1() , args = ())
new_thread2 = threading.Thread(target = Function2(), args = ())
new_thread3 = threading.Thread(target = Function3(), args = ())
e1 = time.clock()
E1 = time.time()
new_thread1.start()
new_thread2.start()
new_thread3.start()
e2 = time.clock()
E2 = time.time()
print e2 - e1
print E2 - E1
The out put of this one was:
0.000601416222253
0.0
These numbers make no sense to me. I'm just trying to measure how much time does threading save me. I've looked up in the documentation and using time.time
and time.clock made sense to me, but it doesn't make sense here. Also, the actual time for 1st and 2nd snippet were about 10 seconds and 3rd one about 5
you are calling it wrong ....
new_thread1 = threading.Thread(target = Function1 , args = ())
note that you should not CALL the function when you create the thread
those timings really mean nothing they are both essentially zero because all you are timing is 3 instant return function calls to start
note to get the output you will need to wait for each thread to finish (since your current code does not do this )
EDIT FOR MORE INFO
with threading you are locked by the gil to one python instruction at a time... typically this is not a problem since you are usually waiting on disk io... In your example code however it is 100% computation so threading really doesnt improve your time ... Multiprocessing may as demonstrated below
import time
import threading
import multiprocessing
def fn():
'''since all 3 functions were identical you can just use one ...'''
x = 0
while x < 100000000:
x += 1
def TEST_THREADS():
new_thread1 = threading.Thread(target = fn , args = ())
new_thread2 = threading.Thread(target = fn, args = ())
new_thread3 = threading.Thread(target = fn, args = ())
new_thread1.start()
new_thread2.start()
new_thread3.start()
new_thread1.join()
new_thread2.join()
new_thread3.join()
def TEST_NORMAL():
fn()
fn()
fn()
def TEST_MULTIPROCESSING():
new_thread1 = multiprocessing.Process(target = fn , args = ())
new_thread2 = multiprocessing.Process(target = fn, args = ())
new_thread3 = multiprocessing.Process(target = fn, args = ())
new_thread1.start()
new_thread2.start()
new_thread3.start()
new_thread1.join()
new_thread2.join()
new_thread3.join
if __name__ == "__main__":
'''It is very important to use name == __main__ guard code with threads and multiprocessing'''
import timeit
print "Time to Run 1x: %0.2fs"%(timeit.timeit(fn,number=1),)
print "NORMAL:%0.2fs"%(timeit.timeit(TEST_NORMAL,number=1),)
print "Threaded: %0.2fs"%(timeit.timeit(TEST_THREADS,number=1),)
print "Multiprocessing: %0.2fs"%(timeit.timeit(TEST_MULTIPROCESSING,number=1),)
I get the following output
Time to Run 1x: 3.71181102665
NORMAL:11.0136830117
Threaded: 23.392143814
Multiprocessing: 3.80878260515

Categories