Progress bar when using Numba (tqdm is not working) -Python - python

I have been trying to run a code using numba and I have also added a print to see the progress of my code :
from numba import jit,njit,prange
import numpy as np
# for minimum reproducible example
a=1e5
ar = np.random.rand(a)
at = np.random.rand(a)
an = np.random.rand(a)
###############################3
tau = 1 # time lag
window = 6000
#njit(parallel=True)
def func_DB(ar,at,an):
DBtotal= np.zeros((len(an)-tau))
k = 0
for i in prange(0,len(an)-tau,1):
DBtotal[i] = np.sqrt((ar[i + tau]- ar[i])**2 +(at[i + tau]- at[i])**2 +(an[i + tau]- an[i])**2)
## To print the progress
if i%1e5==0:
k+=1
print(k*1e5/len(DBtotal))
return DBtotal
#njit(parallel=True)
def func_PVI(tau, window):
PVI = np.zeros((len(DBtotal)))
k = 0
for i in prange(int(window/2),len(DBtotal)-int(window/2)):
PVI[i] = DBtotal[i]/np.sqrt((np.mean(DBtotal[i-int(window/2):i+int(window/2)]**2)))
# To print the progress
if i%1e5==0:
k+=1
print(k*1e5/len(DBtotal))
return PVI
DBtotal = func_DB(ar,at,an)
PVI = func_PVI(DBtotal,tau, window)
However, while the code is running I dont get what I expected (i.e. values that go from 0 to 1 as the code is progressing) Instead, I get this:
Out[:] 0.009479390005044932
0.009479390005044932
0.009479390005044932
0.009479390005044932
0.009479390005044932
0.018958780010089864
Could someone suggest a way to see the progress of the code?
Also, any suggestions to make the code more efficient would be much appreciated!

I broke the function down into pieces and wrapped a tqdm around it.
Instead of
#jit(nopython=True)
def dothings(A, rows, cols):
for r in range(rows):
for c in range(cols):
stuff...
dothings(data, data.shape[0], data.shape[1])
I used
rows=data.shape[0]
#jit(nopython=True)
def dothings(A, cols, r):
# for r in range(rows):
for c in range(cols):
stuff...
for r in tqdm.tqdm(range(rows), total=rows):
dothings(data, data.shape[1], r)

try this:
from numba import njit,prange,objmode
#njit(parallel=True)
def harmonic_load_flow_func_time_inside():
with objmode(time1='f8'):
time1 = time.perf_counter()
calc = 0
for x in prange(1000000):
calc += x
with objmode():
print('time: {}'.format(time.perf_counter() - time1),end='\r')

Related

I want to convert the following MATLAB code into python? Is this the correct way?

How can I change this part [AIF,j]=get_AIF_j(InterpFact) and [~,j_index] = min(InterpFact-AIF_vect) correctly? And what about the remaining code? Thanks in advance.
%Matlab code
InterpFact = (fs_h/2/2)/(fd_max);
[AIF,j]=get_AIF_j(InterpFact);
function [AIF,j] = get_AIF_j (InterpFact)
j_vect = 1:10;
AIF_vect = floor(j_vect*InterpFact)./j_vect;
[~,j_index] = min(InterpFact-AIF_vect);
j = j_vect(j_index);
AIF = AIF_vect(j_index);
end
#Python code
InterpFact = (fs_h/2/2)/(fd_max)
[AIF,j]=get_AIF_j(InterpFact)
def get_AIF_j (InterpFact):
j_vect =np.arange(1,11)
AIF_vect = np.floor(j_vect*InterpFact)/j_vect
[~,j_index] = min(InterpFact-AIF_vect)
j = j_vect[j_index]
AIF = AIF_vect[j_index];
return AIF,j
This MATLAB:
[~,j_index] = min(InterpFact-AIF_vect);
would be translated to Python as:
j_index = np.argmin(InterpFact-AIF_vect)
Also, …/(fd_max) can only be translated the way you did if fd_max is a scalar. A division with a matrix in MATLAB solves a system of linear equations.
I strongly recommend that you run the two pieces of code side by side with the same input, to verify that they do the same thing. You cannot go by guesses as to what a piece of code does.
Try this to see if it delivers what it should (I am not sure here as I am not fluent in matlab):
#Python code
import numpy as np
def get_AIF_j (InterpFact):
j_vect = np.arange(1,11)
AIF_vect = np.floor(j_vect*InterpFact)/j_vect
j_index = int( min(InterpFact-AIF_vect) )
print(j_index)
j = j_vect[j_index]
AIF = AIF_vect[j_index];
return AIF, j
fs_h = 24; fd_max = 1
InterpFact = (fs_h/2/2)/(fd_max)
AIF, j = get_AIF_j(InterpFact)
print(AIF,j)
gives:
0
6.0 1

Why is the miscount due to race condition a multiple of n/cpu cores?

I was writing a code example showing a problem with a race condition with numba.jit and parallel=True.
import numpy as np
import pandas as pd
from numba import jit, prange
from collections import Counter
import fractions
n = 10**6
m = 10**6
#jit(nopython=True, parallel=True)
def test():
lst = [0]
for i in prange(n):
lst[0] += 1
return lst
error = Counter([str(fractions.Fraction(test()[0], n)) for _ in range(m)])
df = pd.DataFrame(error.items())
def func(x,y='1'): return int(x)/int(y)
df[2] = df[0].apply(lambda _str: func(*_str.split('/')))
df = df.sort_values(2)
ax = df.plot.bar(x=0, y=1)
ax.set_xlabel('ratio count/maximal_count')
ax.get_legend().remove()
What surprised me was that the miscounts due to race condition are multiples of n/cpu cores. It is distributed like this.
I basically understand what's going on:
lst[0] += 1
is short for
buffer = lst[0]
lst[0] = buffer+1
And if an other process is doing the same thing they might overwrite in the wrong moment.
I have two questions though:
Can somebody confirm that it's ruffly distributed like this?
And why is it distributed like this?

Trying to optimize my complex function to excute in a polynomial time

I have this code that generate all the 2**40 possible binary numbers, and from this binary numbers, i will try to get all the vectors that match my objectif function conditions which is:
1- each vector in the matrix must have 20 of ones(1).
2- the sum of s = s + (the index of one +1)* the rank of the one must equal 4970.
i wrote this code but it will take a lot of time maybe months, to give the results. Now, i am looking for an alternative way or an optimization of this code if possible.
import time
from multiprocessing import Process
from multiprocessing import Pool
import numpy as np
import itertools
import numpy
CC = 20
#test if there is 20 numbers of 1
def test1numebers(v,x=1,x_l=CC):
c = 0
for i in range(len(v)):
if(v[i]==x):
c+=1
if c == x_l:
return True
else:
return False
#s = s+ the nth of 1 * (index+1)
def objectif_function(v,x=1):
s = 0
for i in range(len(v)):
if(v[i]==x):
s = s+((i+1)*nthi(v,i))
return s
#calculate the nth of 1 in a vecteur
def nthi(v,i):
c = 0
for j in range(0,i+1):
if(v[j] == 1):
c+=1
return c
#generate 2**40 of all possible binray numbers
def generateMatrix(N):
l = itertools.product([0, 1], repeat=N)
return l
#function that get the number of valide vector that match our objectif function
def main_algo(N=40,S=4970):
#N = 40
m = generateMatrix(N)
#S = 4970
c = 0
ii = 0
for i in m:
ii+=1
print("\n count:",ii)
xx = i
if(test1numebers(xx)):
if(objectif_function(xx)==S):
c+=1
print('found one')
print('\n',xx,'\n')
if ii>=1000000:
break
t_end = time.time()
print('time taken for 10**6 is: ',t_end-t_start)
print(c)
#main_algo()
if __name__ == '__main__':
'''p = Process(target=main_algo, args=(40,4970,))
p.start()
p.join()'''
p = Pool(150)
print(p.map(main_algo, [40,4970]))
While you could make a lot of improvements in readability and make your code more pythonic.
I recommend that you use numpy which is the fastest way of working with matrixes.
Avoid working with matrixes on a "pixel by pixel" loop. With numpy you can make those calculations faster and with all the data at once.
Also numpy has support for generating matrixes really fast. I think that you could make a random [0,1] matrix in less lines of code and quite faster.
Also i recommend that you install OPENBLAS, ATLAS and LAPACK which make linear algebra calculations quite faster.
I hope this helps you.

Code runs much faster in C than in NumPy

I wrote physics simulation code in python using numpy and than rewrote it to C++. in C++ it takes only 0.5 seconds while in python around 40s. Can someone please help my find what I did horribly wrong?
import numpy as np
def myFunc(i):
uH = np.copy(u)
for j in range(1, xmax-1):
u[i][j] = a*uH[i][j-1]+(1-2*a)*uH[i][j]+a*uH[i][j+1]
u[i][0] = u[i][0]/b
for x in range(1, xmax):
u[i][x] = (u[i][x]+a*u[i][x-1])/(b+a*c[x-1])
for x in range(xmax-2,-1,-1):
u[i][x]=u[i][x]-c[x]*u[i][x+1]
xmax = 101
tmax = 2000
#All other variables are defined here but I removed that for visibility
uH = np.zeros((xmax,xmax))
u = np.zeros((xmax,xmax))
c = np.full(xmax,-a)
uH[50][50] = 10000
for t in range(1, tmax):
if t % 2 == 0:
for i in range(0,xmax):
myFunc(i)
else:
for i in range(0, xmax):
myFunc(i)
In case someones wants to run it here is whole code: http://pastebin.com/20ZSpBqQ
EDIT: all variables are defined in the whole code which can be found on pastebin. Sorry for confusion, I thought removing all the clutter will make the code easier to understand
fundamentally, C is a compiled language, when Python is a interpreted one, speed against ease of use.
Numpy can fill the gap, but you must avoid for loop on items, which need often
some skills.
For exemple,
def block1():
for i in range(xmax):
for j in range(1, xmax-1):
u[i][j] = a*uH[i][j-1]+(1-2*a)*uH[i][j]+a*uH[i][j+1]
is in numpy style :
def block2():
u[:,1:-1] += a*np.diff(u,2)
with is shorter and faster ( and easier to read and understand ?) :
In [37]: %timeit block1()
10 loops, best of 3: 25.8 ms per loop
In [38]: %timeit block2()
10000 loops, best of 3: 123 µs per loop
At last, you can speed numpy code with Just In Time compilation, what is allowed with Numba. Just change the beginning of your code like :
import numba
#numba.jit
def myFunc(u,i):
...
and the calls by myFunc(u,i) at the end of the script (u must be a parameter for automatic determination of types) and you will reach the same performance (0,4 s on my PC).
So when I ran your numpy python code it took four minutes to run, once I removed the numpy code and replaced it with standard python code it only took one minute! (I have a not so fast computer)
Here's that code:
#import numpy as np
def impl(i,row):
if row:
uH = u[:][:] # this copys the array 'u'
for j in range(1, xmax-1):
u[i][j] = a*uH[i][j-1]+(1-2*a)*uH[i][j]+a*uH[i][j+1]
u[i][0] = u[i][0]/b
for x in range(1, xmax):
u[i][x] = (u[i][x]+a*u[i][x-1])/(b+a*c[x-1])
for x in range(xmax-2,-1,-1):
u[i][x]=u[i][x]-c[x]*u[i][x+1]
else:
uH = u[:][:] # this copys the array 'u'
for j in range(1, xmax-1):
u[j][i]= a*uH[j-1][i]+(1-2*a)*uH[j][i]+a*uH[j+1][i]
u[0][i] = u[0][i]/b
for y in range(1, xmax):
u[y][i] = (u[y][i]+a*u[y-1][i])/(b+a*c[y-1])
for y in range(xmax-2,-1,-1):
u[y][i]=u[y][i]-c[y]*u[y+1][i]
#Init
xmax = 101
tmax = 2000
D = 0.5
l = 1
tSec = 0.1
uH = [[0.0]*xmax]*xmax #np.zeros((xmax,xmax))
u = [[0.0]*xmax]*xmax #np.zeros((xmax,xmax))
dx = l / xmax
dt = tSec / tmax
a = (D*dt)/(dx*dx);
b=1+2*a
print("dx=="+str(dx))
print("dt=="+str(dt))
print(" a=="+str(a))
#koeficient c v trojdiagonalnej matici
c = [-a]*xmax #np.full(xmax,-a)
c[0]=c[0]/b
for i in range(1, xmax):
c[i]=c[i]/(b+a*c[i-1])
uH[50][50] = 10000
u = uH
for t in range(1, tmax):
if t % 2 == 0:
for i in range(0,xmax):
impl(i,False)
else:
for i in range(0, xmax):
impl(i,True)
I believe that this could be much faster if you were to have used numpy the correct way rather than as a substitute for arrays, however, not using numpy arrays cut the time to 1/4th of the original.

How to run single function in to a number of times in python

I tried to run simple function n times by using the code below:
df = pd.DataFrame()
def repeat_fun(times, f, args):
for i in range(times): f(args)
def f(x):
g = np.random.normal(0, 1, 32)
mm = np.random.normal(491.22, 128.23, 32)
x = 491.22+(0.557*(mm -491.22))+(g*128.23*(np.sqrt(1-0.557**2)))
print x
repeat_fun(2,f,df)
But I want the result column-wise with respect to run times. The function above gives the result in one array types.Can anyone help me to figure-out this problem.
Hard to know what you mean, but I assume you want the results of f to be stored as columns in a dataframe. If thats's the case:
import pandas as pd
import numpy as np
df = pd.DataFrame()
def repeat_fun(times, f, args):
for i in range(times): f(i,args)
def f(iteration,df):
g = np.random.normal(0, 1, 32)
mm = np.random.normal(491.22, 128.23, 32)
x = 491.22+(0.557*(mm -491.22))+(g*128.23*(np.sqrt(1-0.557**2)))
df[iteration] = x
repeat_fun(2,f,df)
Run this and look at/print the contents of df and see if that helps.

Categories