In my following code i m running a lorentz chaotic equation from which i will get random numbers in terms of xs , ys and zs
import numpy as np
def lorenz(x, y, z, a=10,b=8/3,c=28 ):
x_dot = a*(y -x)
y_dot = - y +c*x - x*z
z_dot = -b*z + x*y
return x_dot, y_dot, z_dot
dt = 0.01
num_steps = 10000
# Need one more for the initial values
xs = np.empty(num_steps + 1)
ys = np.empty(num_steps + 1)
zs = np.empty(num_steps + 1)
# Set initial values
xs[0], ys[0], zs[0]= (1,1,1)
# Step through "time", calculating the partial derivatives at the current point
# and using them to estimate the next point
for i in range(num_steps):
x_dot, y_dot, z_dot= lorenz(xs[i], ys[i], zs[i])
xs[i + 1] = xs[i] + (x_dot * dt)
ys[i + 1] = ys[i] + (y_dot * dt)
zs[i + 1] = zs[i] + (z_dot * dt)
I am actually trying to test the xs, ys and zs value for random number generating test via NIST 800 by using the code below
from __future__ import print_function
import math
from fractions import Fraction
from scipy.special import gamma, gammainc, gammaincc
# from gamma_functions import *
import numpy
import cmath
import random
#ones_table = [bin(i)[2:].count('1') for i in range(256)]
def count_ones_zeroes(bits):
ones = 0
zeroes = 0
for bit in bits:
if (bit == 1):
ones += 1
else:
zeroes += 1
return (zeroes,ones)
def runs_test(bits):
n = len(bits)
zeroes,ones = count_ones_zeroes(bits)
prop = float(ones)/float(n)
print(" prop ",prop)
tau = 2.0/math.sqrt(n)
print(" tau ",tau)
if abs(prop-0.5) > tau:
return (False,0.0,None)
vobs = 1.0
for i in range(n-1):
if bits[i] != bits[i+1]:
vobs += 1.0
print(" vobs ",vobs)
p = math.erfc(abs(vobs - (2.0*n*prop*(1.0-prop)))/(2.0*math.sqrt(2.0*n)*prop*(1-prop) ))
success = (p >= 0.01)
return (success,p,None)
print(runs_test(xs))
#%%
from __future__ import print_function
import math
def count_ones_zeroes(bits):
ones = 0
zeroes = 0
for bit in bits:
if (bit == 1):
ones += 1
else:
zeroes += 1
return (zeroes,ones)
def monobit_test(bits):
n = len(bits)
zeroes,ones = count_ones_zeroes(bits)
s = abs(ones-zeroes)
print(" Ones count = %d" % ones)
print(" Zeroes count = %d" % zeroes)
p = math.erfc(float(s)/(math.sqrt(float(n)) * math.sqrt(2.0)))
success = (p >= 0.01)
return (success,p,None)
print(runs_test(xs))
the output which i m getting is false i.e
output:
prop 0.00019998000199980003
tau 0.01999900007499375
(False, 0.0, None)
what should i do now?
The Lorenz system is chaotic, not random. You implemented the differential equation solver well, but it seems that count_ones_zeroes doesn't do what its name implies, at least, not on the data you provide. on xs, it returns that (zeroes, ones) = (9999, 2), which is not what you want. The code checks the value within the xs array, i.e. an x value (e.g. 8.2) against 1, but x is a float between -20 and 20, so it will be usually non1, and will be counted as 0. Only x==1 will be counted as ones.
In python, int/int results in float, so there is no need to cast it to float, in contrast to e.g. C or C++, so instead of prop = float(ones)/float(n), you can write prop = ones/n Similar statements hold for +,- and *
Related
I'm trying to write a function to evaluate the probability mass function for the bivariate poisson distribution.
This is easy when all of the parameters (x, y, theta1, theta2, theta0) are scalars, but tricky to scale up without loops to allow these parameters to be vectors. I need it to scale such that, for:
theta0 being a scalar - the "correlation parameter" in the equation
theta1 and theta2 having length l
x, y both having length n
the output array would have shape (l, n, n). For example, a slice [j, :, :] from the output array would look like:
The first part (the constant, before the summation) I think i've figured out:
import numpy as np
from scipy.special import factorial
def constant(theta1, theta2, theta0, x, y):
exponential_part = np.exp(-(theta1 + theta2 + theta0)).reshape(-1, 1, 1)
x = np.tile(x, (len(x), 1)).transpose()
y = np.tile(y, (len(y), 1))
double_factorial = (np.power(np.array(theta1).reshape(-1, 1, 1), x)/factorial(x)) * \
(np.power(np.array(theta2).reshape(-1, 1, 1), y)/factorial(y))
return exponential_part * double_factorial
But I'm struggling with the summation part. How can I vectorize a summation where the limits depend on variable arrays?
I think I have this figured out, based on the approach that #w-m suggests: calculate every possible summation term which could appear, based on the maximum x or y value which appears, and use a mask to get rid of the ones you don't want. Assuming you have your x and y terms go from 0 to N, in consecutive order, this is calculating up to three times more terms than are actually required, but this is offset by getting to use vectorization.
Reference implementation
I wrote this by first writing a pure-Python reference implementation, which just implements your problem using loops. With 4 nested loops, it's not exactly fast, but it's handy to have while testing the numpy version.
import numpy as np
from scipy.special import factorial, comb
import operator as op
from functools import reduce
def choose(n, r):
# https://stackoverflow.com/a/4941932/530160
r = min(r, n-r)
numer = reduce(op.mul, range(n, n-r, -1), 1)
denom = reduce(op.mul, range(1, r+1), 1)
return numer // denom # or / in Python 2
def reference_impl_constant(s_theta1, s_theta2, s_theta0, s_x, s_y):
# Cast to float to prevent overflow
s_theta1 = float(s_theta1)
s_theta2 = float(s_theta2)
s_theta0 = float(s_theta0)
s_x = float(s_x)
s_y = float(s_y)
term1 = np.exp(-(s_theta1 + s_theta2 + s_theta0))
term2 = (s_theta1 ** s_x / factorial(s_x))
term3 = (s_theta2 ** s_y / factorial(s_y))
assert term1 >= 0
assert term2 >= 0
assert term3 >= 0
return term1 * term2 * term3
def reference_impl_constant_loop(theta1, theta2, theta0, x, y):
theta_len = theta1.shape[0]
xy_len = x.shape[0]
constant_array = np.zeros((theta_len, xy_len, xy_len))
for i in range(theta_len):
for j in range(xy_len):
for k in range(xy_len):
s_theta1 = theta1[i]
s_theta2 = theta2[i]
s_theta0 = theta0
s_x = x[j]
s_y = y[k]
constant_term = reference_impl_constant(s_theta1, s_theta2, s_theta0, s_x, s_y)
assert constant_term >= 0
constant_array[i, j, k] = constant_term
return constant_array
def reference_impl_summation(s_theta1, s_theta2, s_theta0, s_x, s_y):
sum_ = 0
for i in range(min(s_x, s_y) + 1):
sum_ += choose(s_x, i) * choose(s_y, i) * factorial(i) * ((s_theta0/s_theta1/s_theta2) ** i)
assert sum_ >= 0
return sum_
def reference_impl_summation_loop(theta1, theta2, theta0, x, y):
theta_len = theta1.shape[0]
xy_len = x.shape[0]
summation_array = np.zeros((theta_len, xy_len, xy_len))
for i in range(theta_len):
for j in range(xy_len):
for k in range(xy_len):
s_theta1 = theta1[i]
s_theta2 = theta2[i]
s_theta0 = theta0
s_x = x[j]
s_y = y[k]
summation_term = reference_impl_summation(s_theta1, s_theta2, s_theta0, s_x, s_y)
assert summation_term >= 0
summation_array[i, j, k] = summation_term
return summation_array
def reference_impl(theta1, theta2, theta0, x, y):
# all array inputs must be 1D
assert len(theta1.shape) == 1
assert len(theta2.shape) == 1
assert len(x.shape) == 1
assert len(y.shape) == 1
# theta vectors must have same length
theta_len = theta1.shape[0]
assert theta2.shape[0] == theta_len
# x and y must have same length
xy_len = x.shape[0]
assert y.shape[0] == xy_len
# theta0 is scalar
assert isinstance(theta0, (int, float))
constant_array = np.zeros((theta_len, xy_len, xy_len))
output = np.zeros((theta_len, xy_len, xy_len))
constant_array = reference_impl_constant_loop(theta1, theta2, theta0, x, y)
summation_array = reference_impl_summation_loop(theta1, theta2, theta0, x, y)
output = constant_array * summation_array
return output
Numpy implementation
I split the implementation of this across two functions.
The fast_constant() function calculates everything to the left of the summation symbol. The fast_summation() function calculates everything inside the summation symbol.
import numpy as np
from scipy.special import factorial, comb
def fast_summation(theta1, theta2, theta0, x, y):
x = np.tile(x, (len(x), 1)).transpose()
y = np.tile(y, (len(y), 1))
sum_limit = np.minimum(x, y)
max_sum_limit = np.max(sum_limit)
i = np.arange(max_sum_limit + 1).reshape(-1, 1, 1)
summation_mask = (i <= sum_limit)
theta_ratio = (theta0 / (theta1 * theta2)).reshape(-1, 1, 1, 1)
theta_to_power = np.power(theta_ratio, i)
terms = comb(x, i) * comb(y, i) * factorial(i) * theta_to_power
# mask out terms which aren't part of sum
terms *= summation_mask
# axis 0 is theta
# axis 1 is i
# axis 2 & 3 are x and y
# so sum across axis 1
terms = terms.sum(axis=1)
return terms
def fast_constant(theta1, theta2, theta0, x, y):
theta1 = theta1.astype('float64')
theta2 = theta2.astype('float64')
exponential_part = np.exp(-(theta1 + theta2 + theta0)).reshape(-1, 1, 1)
# x and y must be 1D
assert len(x.shape) == 1
assert len(y.shape) == 1
# x and y must have same shape
assert x.shape == y.shape
x_len, y_len = x.shape[0], y.shape[0]
x = x.reshape((x_len, 1))
y = y.reshape((1, y_len))
double_factorial = (np.power(np.array(theta1).reshape(-1, 1, 1), x)/factorial(x)) * \
(np.power(np.array(theta2).reshape(-1, 1, 1), y)/factorial(y))
return exponential_part * double_factorial
def fast_impl(theta1, theta2, theta0, x, y):
return fast_summation(theta1, theta2, theta0, x, y) * fast_constant(theta1, theta2, theta0, x, y)
Benchmarking
Assuming that X and Y range from 0 to 20, and that theta is centered somewhere inside that range, I get the result that the numpy version is roughly 280 times faster than the pure python reference.
Numerical stability
I'm unsure how numerically stable this is. For example, when I center theta at 100, I get a floating-point overflow. Typically, when computing an expression which has lots of choose and factorial expressions inside it, you'll use some mathematical equivalent which results in smaller intermediate sums. In this case I have so little understanding of the math that I don't know how you'd do that.
This is more of a computational physics problem, and I've asked it on physics stack exchange, but no answers on there. This is, I suppose, a mix of the disciplines on here and there (and maybe even mathematics stack exchange), so finding the right place to post is a task in of itself apparently...
I'm attempting to use Crank-Nicolson scheme to solve the TDSE in 1D. The initial wave is a real Gaussian that has been normalised wrt its probability density. As the solution evolves, a depression grows in the central peak of the real part of the wave, and the imaginary part's central trough is perhaps a bit higher than I expect (image below).
Does this behaviour seem reasonable? I have searched around and not seen questions/figures that are similar. I've tested another person's code from Github and it exhibits the same behaviour, which makes me feel a bit better. But I still think the center peak should just decrease in height and increase in width. The likelihood of me getting a physics-based explanation is relatively low here I'd assume, but a computational-based explanation on errors I may have made is more likely.
I'm happy to give more information, for example my code, or the matrices used in the scheme, etc. Thanks in advance!
Here's a link to GIF of time evolution:
And the part of my code relevant to solving the 1D TDSE:
(pretty much the entire thing except the plotting)
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.animation import FuncAnimation
# Define function for norm.
def normf(dxc, uc, ic):
return sum(dxc * np.square(np.abs(uc[ic, :])))
# Define function for expectation value of position.
def xexpf(dxc, xc, uc, ic):
return sum(dxc * xc * np.square(np.abs(uc[ic, :])))
# Define function for expectation value of squared position.
def xexpsf(dxc, xc, uc, ic):
return sum(dxc * np.square(xc) * np.square(np.abs(uc[ic, :])))
# Define function for standard deviation.
def sdaf(xexpc, xexpsc, ic):
return np.sqrt(xexpsc[ic] - np.square(xexpc[ic]))
# Time t: t0 =< t =< tf. Have N steps at which to evaluate the CN scheme. The
# time interval is dt. decp: variable for plotting to certain number of decimal
# places.
t0 = 0
tf = 20
N = 200
dt = tf / N
t = np.linspace(t0, tf, num = N + 1, endpoint = True)
decp = str(dt)[::-1].find('.')
# Initialise array for filling with norm values at each time step.
norm = np.zeros(len(t))
# Initialise array for expectation value of position.
xexp = np.zeros(len(t))
# Initialise array for expectation value of squared position.
xexps = np.zeros(len(t))
# Initialise array for alternate standard deviation.
sda = np.zeros(len(t))
# Position x: -a =< x =< a. M is an even number. There are M + 1 total discrete
# positions, for the points to be symmetric and centred at x = 0.
a = 100
M = 1200
dx = (2 * a) / M
x = np.linspace(-a, a, num = M + 1, endpoint = True)
# The gaussian function u diffuses over time. sd sets the width of gaussian. u0
# is the initial gaussian at t0.
sd = 1
var = np.power(sd, 2)
mu = 0
u0 = np.sqrt(1 / np.sqrt(np.pi * var)) * np.exp(-np.power(x - mu, 2) / (2 * \
var))
u = np.zeros([len(t), len(x)], dtype = 'complex_')
u[0, :] = u0
# Normalise u.
u[0, :] = u[0, :] / np.sqrt(normf(dx, u, 0))
# Set coefficients of CN scheme.
alpha = dt * -1j / (4 * np.power(dx, 2))
beta = dt * 1j / (4 * np.power(dx, 2))
# Tridiagonal matrices Al and AR. Al to be solved using Thomas algorithm.
Al = np.zeros([len(x), len(x)], dtype = 'complex_')
for i in range (0, M):
Al[i + 1, i] = alpha
Al[i, i] = 1 - (2 * alpha)
Al[i, i + 1] = alpha
# Corner elements for BC's.
Al[M, M], Al[0, 0] = 1 - alpha, 1 - alpha
Ar = np.zeros([len(x), len(x)], dtype = 'complex_')
for i in range (0, M):
Ar[i + 1, i] = beta
Ar[i, i] = 1 - (2 * beta)
Ar[i, i + 1] = beta
# Corner elements for BC's.
Ar[M, M], Ar[0, 0] = 1 - 2*beta, 1 - beta
# Thomas algorithm variables. Following similar naming as in Wiki article.
a = np.diag(Al, -1)
b = np.diag(Al)
c = np.diag(Al, 1)
NT = len(b)
cp = np.zeros(NT - 1, dtype = 'complex_')
for n in range(0, NT - 1):
if n == 0:
cp[n] = c[n] / b[n]
else:
cp[n] = c[n] / (b[n] - (a[n - 1] * cp[n - 1]))
d = np.zeros(NT, dtype = 'complex_')
dp = np.zeros(NT, dtype = 'complex_')
# Iterate over each time step to solve CN method. Maintain boundary
# conditions. Keep track of standard deviation.
for i in range(0, N):
# BC's.
u[i, 0], u[i, M] = 0, 0
# Find RHS.
d = np.dot(Ar, u[i, :])
for n in range(0, NT):
if n == 0:
dp[n] = d[n] / b[n]
else:
dp[n] = (d[n] - (a[n - 1] * dp[n - 1])) / (b[n] - (a[n - 1] * \
cp[n - 1]))
nc = NT - 1
while nc > -1:
if nc == NT - 1:
u[i + 1, nc] = dp[nc]
nc -= 1
else:
u[i + 1, nc] = dp[nc] - (cp[nc] * u[i + 1, nc + 1])
nc -= 1
norm[i] = normf(dx, u, i)
xexp[i] = xexpf(dx, x, u, i)
xexps[i] = xexpsf(dx, x, u, i)
sda[i] = sdaf(xexp, xexps, i)
# Fill in final norm value.
norm[N] = normf(dx, u, N)
# Fill in final position expectation value.
xexp[N] = xexpf(dx, x, u, N)
# Fill in final squared position expectation value.
xexps[N] = xexpsf(dx, x, u, N)
# Fill in final standard deviation value.
sda[N] = sdaf(xexp, xexps, N)
I'm new to python so the code may not be the best. I'm trying to find the minimum Total Cost (TotalC) and the corresponding m,k and xM values that go with this minimum cost. I'm not sure how to do this. I have tried using min(TotalC) however this gives an error within the loop or outside the loop only returns the value of TotalC and not the corresponding m, k, and xM values. Any help would be appreciated. This section is at the end of the code, I have included my entire code.
I have tried using
minIndex = TotalC.argmin()
but I'm not sure how to use it and it only returns 0 each time.
import numpy as np
import matplotlib.pyplot as plt
def Load(x):
Fpeak = (1000 + (9*(x**2) - (183*x))) *1000 #Fpeak in N
td = (20 - ((0.12)*(x**2)) + (4.2*(x))) / 1000 #td in s
return Fpeak, td
#####################################################################################################
####################### Part 2 ########################
def displacement(m,k,x,dt): #Displacement function
Fpeak, td = Load(x) #Load Function from step 1
w = np.sqrt(k/m) # Natural circular frequency
T = 2 * np.pi /w #Natural period of blast (s)
time = np.arange(0,2*T,0.001) #Time array with range (0 - 2*T) with steps of 2*T/100
zt = [] #Create a lsit to store displacement values
for t in time:
if (t <= td):
zt.append((Fpeak/k) * (1 - np.cos(w*t)) + (Fpeak/(k*td)) * ((np.sin(w*t)/w) - t))
else:
zt.append((Fpeak/(k*w*td)) * (np.sin(w*t) - np.sin(w*(t-td))) - ((Fpeak/k) * np.cos(w*t)))
zmax=max(zt) #Find the max displacement from the list of zt values
return zmax #Return max displacement
k = 1E6
m = 200
dt = 0.0001
x = 0
z = displacement(m,k,x,dt)
###################################################################################
############### Part 3 #######################
# k = 1E6 , m = 200kg , Deflection = 0.1m
k_values = np.arange(1E6, 7E6, ((7E6-1E6)/10)) #List of k values between min and max (1E6 and 7E6).
m_values = np.arange(200,1200,((1200-200)/10)) #List of m values between min and max 200kg and 1200kg
xM = []
for k in k_values: # values of k
for m in m_values: # values of m within k for loop
def bisector(m,k,dpoint,dt): #dpoint = decimal point accuracy
xL = 0
xR = 10
xM = (xL + xR)/2
zmax = 99
while round(zmax, dpoint) !=0.1:
zmax = displacement(m,k,xM,dt)
if zmax > 0.1:
xL = xM
xM = (xL + xR)/2
else:
xR = xM
xM = (xL + xR)/2
return xM
xM = bisector(m, k, 4, 0.001)
print('xM value =',xM)
#####################################################
#######Step 4
def cost (m,k,xM):
Ck = 900 + 825*((k/1E6)**2) - (1725*(k/1E6))
Cm = 10*m - 2000
Cx = 2400*((xM**2)/4)
TotalC = Ck + Cm + Cx
minIndex = TotalC.argmin(0)
print(minIndex)
return TotalC
TotalC = cost(m, k, xM)
minIndex = TotalC.argmin()
print(minIndex)
print([xM, m, k, TotalC])
argmin() returns the index of a minimum value. If you are looking for the minimum itself, try using .min(). There is also a possibility that 0 is the lowest value in Your array so bear that in mind
I am trying to create a program that solves the mass-spring-damper system using backward differentiating, the only problem is that I am running into an index error that I am not sure how to solve:
import numpy as np
import matplotlib.pyplot as plt
def MSD_Solver(m,b,K):
#input: m = mass, b = damping ratio, K = spring constant
#output: (t,x) time vs position
tinitial = 0
tfinal = 15
step = .005
t = np.linspace(tinitial,tfinal,step)
x = np.zeros_like(t)
x[0]=0
x[1]=0
for k in range (len(t)-1): # extra element so subtract by 1
x[k] = (t**2)/((m+b)*t+(t**2)*k) + (x[k-2](-m))/((m+b)*t+(t**2)*k) + (x[k-1]((2*m)+(b*t)))/((m+b)*t+(t**2)*k)
return plt.plot(t,x)
print(MSD_Solver(1,.5,5)),MSD_Solver(1,1,5),MSD_Solver(1,2,5)
plt.show()
The linspace doc shows that the third argument is the number of items, not the step. Your step value got truncated to 0, so the returned array for t was empty. As a result, x has no elements, and x[0] is out of range.
Try this:
tinitial = 0
tfinal = 15
step = .005
num = (tfinal - tinitial) / step + 1
t = np.linspace(tinitial,tfinal,num)
This will get you to the semantic errors in your complex computation.
You want, probably(?), use first and second order difference quotients to discretize
m*x''(t) + b*x'(t) + K*x(t) = 1
to
m*(x[j+1]-2*x[j]+x[j-1]) + 0.5*dt*b*(x[j+1]-x[j-1]) + dt^2*K*x[j] = dt**2
so that
x[j+1] = ( dt**2 + (2*m-K*dt**2)*x[j] - (m-0.5*dt*b)*x[j-1] ) / (m+0.5*dt*b)
In code
def MSD_Solver(m,b,K):
#input: m = mass, b = damping ratio, K = spring constant
#output: (t,x) time vs position
tinitial = 0
tfinal = 15
step = .005
t = np.arange(tinitial,tfinal,step)
x = np.zeros_like(t)
dt = t[1]-t[0] # use the actual time step
x[0:2] = [ 0, 0]
for j in range(1,len(t)-1):
x[j+1] = ( dt**2 + (2*m-K*dt**2)*x[j] - (m-0.5*dt*b)*x[j-1] ) / (m+0.5*dt*b)
return t,x
t,x = MSD_Solver(1,.5,5)
plt.plot(t,x); plt.show();
Are there any pure-python implementations of the inverse error function?
I know that SciPy has scipy.special.erfinv(), but that relies on some C extensions. I'd like a pure python implementation.
I've tried writing my own using the Wikipedia and Wolfram references, but it always seems to diverge from the true value when the arg is > 0.9.
I've also attempted to port the underlying C code that Scipy uses (ndtri.c and the cephes polevl.c functions) but that's also not passing my unit tests.
Edit: As requested, I've added the ported code.
Docstrings (and doctests) have been removed because they're longer than the functions. I haven't yet put much effort into making the port more pythonic - I'll worry about that once I get something that passes unit tests.
Supporting functions from cephes polevl.c
def polevl(x, coefs, N):
ans = 0
power = len(coefs) - 1
for coef in coefs[:N]:
ans += coef * x**power
power -= 1
return ans
def p1evl(x, coefs, N):
return polevl(x, [1] + coefs, N)
Main Inverse Error Function
def inv_erf(z):
if z < -1 or z > 1:
raise ValueError("`z` must be between -1 and 1 inclusive")
if z == 0:
return 0
if z == 1:
return math.inf
if z == -1:
return -math.inf
# From scipy special/cephes/ndrti.c
def ndtri(y):
# approximation for 0 <= abs(z - 0.5) <= 3/8
P0 = [
-5.99633501014107895267E1,
9.80010754185999661536E1,
-5.66762857469070293439E1,
1.39312609387279679503E1,
-1.23916583867381258016E0,
]
Q0 = [
1.95448858338141759834E0,
4.67627912898881538453E0,
8.63602421390890590575E1,
-2.25462687854119370527E2,
2.00260212380060660359E2,
-8.20372256168333339912E1,
1.59056225126211695515E1,
-1.18331621121330003142E0,
]
# Approximation for interval z = sqrt(-2 log y ) between 2 and 8
# i.e., y between exp(-2) = .135 and exp(-32) = 1.27e-14.
P1 = [
4.05544892305962419923E0,
3.15251094599893866154E1,
5.71628192246421288162E1,
4.40805073893200834700E1,
1.46849561928858024014E1,
2.18663306850790267539E0,
-1.40256079171354495875E-1,
-3.50424626827848203418E-2,
-8.57456785154685413611E-4,
]
Q1 = [
1.57799883256466749731E1,
4.53907635128879210584E1,
4.13172038254672030440E1,
1.50425385692907503408E1,
2.50464946208309415979E0,
-1.42182922854787788574E-1,
-3.80806407691578277194E-2,
-9.33259480895457427372E-4,
]
# Approximation for interval z = sqrt(-2 log y ) between 8 and 64
# i.e., y between exp(-32) = 1.27e-14 and exp(-2048) = 3.67e-890.
P2 = [
3.23774891776946035970E0,
6.91522889068984211695E0,
3.93881025292474443415E0,
1.33303460815807542389E0,
2.01485389549179081538E-1,
1.23716634817820021358E-2,
3.01581553508235416007E-4,
2.65806974686737550832E-6,
6.23974539184983293730E-9,
]
Q2 = [
6.02427039364742014255E0,
3.67983563856160859403E0,
1.37702099489081330271E0,
2.16236993594496635890E-1,
1.34204006088543189037E-2,
3.28014464682127739104E-4,
2.89247864745380683936E-6,
6.79019408009981274425E-9,
]
s2pi = 2.50662827463100050242
code = 1
if y > (1.0 - 0.13533528323661269189): # 0.135... = exp(-2)
y = 1.0 - y
code = 0
if y > 0.13533528323661269189:
y = y - 0.5
y2 = y * y
x = y + y * (y2 * polevl(y2, P0, 4) / p1evl(y2, Q0, 8))
x = x * s2pi
return x
x = math.sqrt(-2.0 * math.log(y))
x0 = x - math.log(x) / x
z = 1.0 / x
if x < 8.0: # y > exp(-32) = 1.2664165549e-14
x1 = z * polevl(z, P1, 8) / p1evl(z, Q1, 8)
else:
x1 = z * polevl(z, P2, 8) / p1evl(z, Q2, 8)
x = x0 - x1
if code != 0:
x = -x
return x
result = ndtri((z + 1) / 2.0) / math.sqrt(2)
return result
I think the error in your code is in the for loop over coefficients in the polevl function. If you replace what you have with the function below everything seems to work.
def polevl(x, coefs, N):
ans = 0
power = len(coefs) - 1
for coef in coefs:
ans += coef * x**power
power -= 1
return ans
I have tested it against scipy's implementation with the following code:
import numpy as np
from scipy.special import erfinv
N = 100000
x = np.random.rand(N) - 1.
# Calculate the inverse of the error function
y = np.zeros(N)
for i in range(N):
y[i] = inv_erf(x[i])
assert np.allclose(y, erfinv(x))
sympy? some digging may be needed to see how its implemented internally http://docs.sympy.org/latest/modules/functions/special.html#sympy.functions.special.error_functions.erfinv
from sympy import erfinv
erfinv(0.9).evalf(30)
1.16308715367667425688580351562