for consistency, I want to reimplement the conversion from a rotation matrix to quaternions of the Cpp Eigen library in python.
The Cpp implementation can be found here and below you can find my python implementation:
def rotationMatrixToQuaternion3(m):
#q0 = qw
t = np.matrix.trace(m)
q = np.asarray([0.0, 0.0, 0.0, 0.0], dtype=np.float64)
if(t > 0):
t = np.sqrt(t + 1)
q[0] = 0.5 * t
t = 0.5/t
q[1] = (m[2,1] - m[1,2]) * t
q[2] = (m[0,2] - m[2,0]) * t
q[3] = (m[1,0] - m[0,1]) * t
else:
i = 0
if (m[1,1] > m[0,0]):
i = 1
if (m[2,2] > m[i,i]):
i = 2
j = (i+1)%3
k = (j+1)%3
t = np.sqrt(m[i,i] - m[j,j] - m[k,k] + 1)
q[i] = 0.5 * t
t = 0.5 / t
q[0] = (m[k,j] - m[j,k]) * t
q[j] = (m[j,i] + m[i,j]) * t
q[k] = (m[k,i] + m[i,k]) * t
return q
Here are some example results:
rotation matrix:
[[-0.00998882 0.01194957 -0.99987871]
[ 0.49223613 -0.87032691 -0.01531875]
[-0.8704044 -0.49232944 0.00281153]]
python implemnation - qw, qx, qy, qz:
[-0.68145553 -0.18496647 0.68613542 0. ]
eigen:
-0.686135 -0.174997 -0.681456 0.184966
rotation matrix:
[[ 0.01541426 0.02293597 -0.9996181 ]
[ 0.49081359 -0.87117607 -0.01242048]
[-0.87112825 -0.49043469 -0.02468582]]
python implemnation - qw, qx, qy, qz:
[-0.17288173 0.18580601 -0.67658628 0. ]
eigen:
-0.686135 -0.174997 -0.681456 0.184966
rotation matrix:
[[ 0.03744363 -0.01068005 -0.99924167]
[ 0.48694091 -0.87299945 0.02757743]
[-0.87263195 -0.48760425 -0.02748771]]
python implemnation - qw, qx, qy, qz:
[-0.18503815 0.17105894 -0.67232212 0. ]
eigen:
-0.672322 -0.185038 -0.696048 0.171059
Help would be highly appreciated!
Thanks,
Johannes
It seems that the order used by Eigen is [x, y, z, w], see same source file that you base your implementation on.
So the indices that you use should be changed in the following way:
def rotationMatrixToQuaternion3(m):
#q0 = qw
t = np.matrix.trace(m)
q = np.asarray([0.0, 0.0, 0.0, 0.0], dtype=np.float64)
if(t > 0):
t = np.sqrt(t + 1)
q[3] = 0.5 * t
t = 0.5/t
q[0] = (m[2,1] - m[1,2]) * t
q[1] = (m[0,2] - m[2,0]) * t
q[2] = (m[1,0] - m[0,1]) * t
else:
i = 0
if (m[1,1] > m[0,0]):
i = 1
if (m[2,2] > m[i,i]):
i = 2
j = (i+1)%3
k = (j+1)%3
t = np.sqrt(m[i,i] - m[j,j] - m[k,k] + 1)
q[i] = 0.5 * t
t = 0.5 / t
q[3] = (m[k,j] - m[j,k]) * t
q[j] = (m[j,i] + m[i,j]) * t
q[k] = (m[k,i] + m[i,k]) * t
return q
And the returned quaternion is [x, y, z, w].
Running the modified code did not produce the same results that you report for Eigen. Unfortunately, I do not know how to reproduce the Eigen results.
However, there is a scipy implementation of quaternion-to-matrix, which gives the same results as the above implementation (up to multiplication by of the vector by -1 which is an inherent ambiguity of the quaternion and is thus implementation-dependent):
from scipy.spatial import transform
mat = np.array([[-0.00998882, 0.01194957, -0.99987871],
[ 0.49223613, -0.87032691, -0.01531875,],
[-0.8704044, -0.49232944, 0.00281153]])
quat_sp = transform.Rotation.from_matrix(mat).as_quat()
So I think the above implementation is correct, and the problem was in the invocation of Eigen.
Related
I am trying to inverse a set of maps using this answer here. I used two of his methods so there is more detail as to how they work in his answer. I also left some comments out to shorten the code.
I have my own camera matrix and distortion coeff's that I use to create an x and y map with cv2.initUndistortRectifyMap(), but when I pass them to invert_maps() I get an out-of-bounds error shown below.
None of this (except the bottom part) is my code and its pretty advanced stuff so I have no clue how to debug it. And I dont have enough credit to comment on the orignal answer. Anyone got a solution?
import numpy as np
import cv2 as cv2
from scipy import ndimage as ndi
from matplotlib import pyplot as plt
import glob
def bilinear_inverse(p, vertices, numiter=4):
p = np.asarray(p)
v = np.asarray(vertices)
sh = p.shape[1:]
if v.ndim == 2:
v = np.expand_dims(v, axis=tuple(range(2, 2 + len(sh))))
# Start in the center
s = .5 * np.ones((2,) + sh)
s0, s1 = s
for k in range(numiter):
# Residual
r = v[0] * (1 - s0) * (1 - s1) + v[1] * s0 * (1 - s1) + v[2] * s0 * s1 + v[3] * (1 - s0) * s1 - p
# Jacobian
J11 = -v[0, 0] * (1 - s1) + v[1, 0] * (1 - s1) + v[2, 0] * s1 - v[3, 0] * s1
J21 = -v[0, 1] * (1 - s1) + v[1, 1] * (1 - s1) + v[2, 1] * s1 - v[3, 1] * s1
J12 = -v[0, 0] * (1 - s0) - v[1, 0] * s0 + v[2, 0] * s0 + v[3, 0] * (1 - s0)
J22 = -v[0, 1] * (1 - s0) - v[1, 1] * s0 + v[2, 1] * s0 + v[3, 1] * (1 - s0)
inv_detJ = 1. / (J11 * J22 - J12 * J21)
s0 -= inv_detJ * (J22 * r[0] - J12 * r[1])
s1 -= inv_detJ * (-J21 * r[0] + J11 * r[1])
return s
def invert_map(xmap, ymap, diagnostics=False):
"""
Generate the inverse of deformation map defined by (xmap, ymap) using inverse bilinear interpolation.
"""
# Generate quadrilaterals from mapped grid points.
quads = np.array([[ymap[:-1, :-1], xmap[:-1, :-1]],
[ymap[1:, :-1], xmap[1:, :-1]],
[ymap[1:, 1:], xmap[1:, 1:]],
[ymap[:-1, 1:], xmap[:-1, 1:]]])
# Range of indices possibly within each quadrilateral
x0 = np.floor(quads[:, 1, ...].min(axis=0)).astype(int)
x1 = np.ceil(quads[:, 1, ...].max(axis=0)).astype(int)
y0 = np.floor(quads[:, 0, ...].min(axis=0)).astype(int)
y1 = np.ceil(quads[:, 0, ...].max(axis=0)).astype(int)
# Quad indices
i0, j0 = np.indices(x0.shape)
# Offset of destination map
x0_offset = x0.min()
y0_offset = y0.min()
# Index range in x and y (per quad)
xN = x1 - x0 + 1
yN = y1 - y0 + 1
# Shape of destination array
sh_dest = (1 + x1.max() - x0_offset, 1 + y1.max() - y0_offset)
# Coordinates of destination array
yy_dest, xx_dest = np.indices(sh_dest)
xmap1 = np.zeros(sh_dest)
ymap1 = np.zeros(sh_dest)
TN = np.zeros(sh_dest, dtype=int)
# Smallish number to avoid missing point lying on edges
epsilon = .01
# Loop through indices possibly within quads
for ix in range(xN.max()):
for iy in range(yN.max()):
# Work only with quads whose bounding box contain indices
valid = (xN > ix) * (yN > iy)
# Local points to check
p = np.array([y0[valid] + ix, x0[valid] + iy])
# Map the position of the point in the quad
s = bilinear_inverse(p, quads[:, :, valid])
# s out of unit square means p out of quad
# Keep some epsilon around to avoid missing edges
in_quad = np.all((s > -epsilon) * (s < (1 + epsilon)), axis=0)
# Add found indices
ii = p[0, in_quad] - y0_offset
jj = p[1, in_quad] - x0_offset
ymap1[ii, jj] += i0[valid][in_quad] + s[0][in_quad]
xmap1[ii, jj] += j0[valid][in_quad] + s[1][in_quad]
# Increment count
TN[ii, jj] += 1
ymap1 /= TN + (TN == 0)
xmap1 /= TN + (TN == 0)
if diagnostics:
diag = {'x_offset': x0_offset,
'y_offset': y0_offset,
'mask': TN > 0}
return xmap1, ymap1, diag
else:
return xmap1, ymap1
# cam matrix and dist coeff's that I brought
cam_matrix = np.array([ [1223.07784, 0, 926.80065],
[ 0, 1231.71291, 546.10496],
[ 0, 0, 1]], dtype='float32')
distortion_profile = np.array([-0.32077, 0.15041, 0.001004, 0.00028, -0.04252], dtype='float32')
# get current maps
mapx, mapy = cv2.initUndistortRectifyMap(cam_matrix, distortion, None, cam_matrix, (1920, 1080), 5)
# invert the maps
mapx_invert, mapy_invert = invert_map(mapx, mapy)
# apply mapping to image
inversed = cv2.remap(img, mapx_invert, mapy_invert ,cv2.INTER_LINEAR)
cv2.imwrite('inversed.png', inversed)
Error:
File "c:\Users\...\redist_image2.py", line 121, in invert_map
ymap1[ii, jj] += i0[valid][in_quad] + s[0][in_quad]
IndexError: index 1382 is out of bounds for axis 1 with size 1020
I'm having trouble with the slow computation of my Python code. Based on the pycallgraph below, the bottleneck seems to be the module named miepython.miepython.mie_S1_S2 (highlighted by pink), which takes 0.47 seconds per call.
The source code for this module is as follows:
import numpy as np
from numba import njit, int32, float64, complex128
__all__ = ('ez_mie',
'ez_intensities',
'generate_mie_costheta',
'i_par',
'i_per',
'i_unpolarized',
'mie',
'mie_S1_S2',
'mie_cdf',
'mie_mu_with_uniform_cdf',
)
#njit((complex128, float64, float64[:]), cache=True)
def _mie_S1_S2(m, x, mu):
"""
Calculate the scattering amplitude functions for spheres.
The amplitude functions have been normalized so that when integrated
over all 4*pi solid angles, the integral will be qext*pi*x**2.
The units are weird, sr**(-0.5)
Args:
m: the complex index of refraction of the sphere
x: the size parameter of the sphere
mu: array of angles, cos(theta), to calculate scattering amplitudes
Returns:
S1, S2: the scattering amplitudes at each angle mu [sr**(-0.5)]
"""
nstop = int(x + 4.05 * x**0.33333 + 2.0) + 1
a = np.zeros(nstop - 1, dtype=np.complex128)
b = np.zeros(nstop - 1, dtype=np.complex128)
_mie_An_Bn(m, x, a, b)
nangles = len(mu)
S1 = np.zeros(nangles, dtype=np.complex128)
S2 = np.zeros(nangles, dtype=np.complex128)
nstop = len(a)
for k in range(nangles):
pi_nm2 = 0
pi_nm1 = 1
for n in range(1, nstop):
tau_nm1 = n * mu[k] * pi_nm1 - (n + 1) * pi_nm2
S1[k] += (2 * n + 1) * (pi_nm1 * a[n - 1]
+ tau_nm1 * b[n - 1]) / (n + 1) / n
S2[k] += (2 * n + 1) * (tau_nm1 * a[n - 1]
+ pi_nm1 * b[n - 1]) / (n + 1) / n
temp = pi_nm1
pi_nm1 = ((2 * n + 1) * mu[k] * pi_nm1 - (n + 1) * pi_nm2) / n
pi_nm2 = temp
# calculate norm = sqrt(pi * Qext * x**2)
n = np.arange(1, nstop + 1)
norm = np.sqrt(2 * np.pi * np.sum((2 * n + 1) * (a.real + b.real)))
S1 /= norm
S2 /= norm
return [S1, S2]
Apparently, the source code is jitted by Numba so it should be faster than it actually is. The number of iterations in for loop in this function is around 25,000 (len(mu)=50, len(a)-1=500).
Any ideas on how to speed up this computation? Is something hindering the fast computation of Numba? Or, do you think the computation is already fast enough?
[More details]
In the above, another function _mie_An_Bn is being used. This function is also jitted, and the source code is as follows:
#njit((complex128, float64, complex128[:], complex128[:]), cache=True)
def _mie_An_Bn(m, x, a, b):
"""
Compute arrays of Mie coefficients A and B for a sphere.
This estimates the size of the arrays based on Wiscombe's formula. The length
of the arrays is chosen so that the error when the series are summed is
around 1e-6.
Args:
m: the complex index of refraction of the sphere
x: the size parameter of the sphere
Returns:
An, Bn: arrays of Mie coefficents
"""
psi_nm1 = np.sin(x) # nm1 = n-1 = 0
psi_n = psi_nm1 / x - np.cos(x) # n = 1
xi_nm1 = complex(psi_nm1, np.cos(x))
xi_n = complex(psi_n, np.cos(x) / x + np.sin(x))
nstop = len(a)
if m.real > 0.0:
D = _D_calc(m, x, nstop + 1)
for n in range(1, nstop):
temp = D[n] / m + n / x
a[n - 1] = (temp * psi_n - psi_nm1) / (temp * xi_n - xi_nm1)
temp = D[n] * m + n / x
b[n - 1] = (temp * psi_n - psi_nm1) / (temp * xi_n - xi_nm1)
xi = (2 * n + 1) * xi_n / x - xi_nm1
xi_nm1 = xi_n
xi_n = xi
psi_nm1 = psi_n
psi_n = xi_n.real
else:
for n in range(1, nstop):
a[n - 1] = (n * psi_n / x - psi_nm1) / (n * xi_n / x - xi_nm1)
b[n - 1] = psi_n / xi_n
xi = (2 * n + 1) * xi_n / x - xi_nm1
xi_nm1 = xi_n
xi_n = xi
psi_nm1 = psi_n
psi_n = xi_n.real
The example inputs are like the followings:
m = 1.336-2.462e-09j
x = 8526.95
mu = np.array([-1., -0.7500396, 0.46037385, 0.5988121, 0.67384093, 0.72468684, 0.76421644, 0.79175856, 0.81723714, 0.83962897, 0.85924182, 0.87641596, 0.89383665, 0.90708978, 0.91931481, 0.93067567, 0.94073113, 0.94961222, 0.95689496, 0.96467123, 0.97138347, 0.97791831, 0.98339434, 0.98870543, 0.99414948, 0.9975728 0.9989995, 0.9989995, 0.9989995, 0.9989995, 0.9989995,0.99899951, 0.99899951, 0.99899951, 0.99899951, 0.99899951, 0.99899951, 0.99899951, 0.99899951, 0.99899951, 0.99899952, 0.99899952,
0.99899952, 0.99899952, 0.99899952, 0.99899952, 0.99899952, 0.99899952, 0.99899952, 1. ])
I am focussing on _mie_S1_S2 since it appear to be the most expensive function on the provided example dataset.
First of all, you can use the parameter fastmath=True to the JIT to accelerate the computation if there is no values like +Inf, -Inf, -0 or NaN computed.
Then you can pre-compute some expensive expression containing divisions or implicit integer-to-float conversions. Note that (2 * n + 1) / n = 2 + 1/n and (n + 1) / n = 1 + 1/n. This can be useful to reduce the number of precomputed array but did not change the performance on my machine (this may change regarding the target architecture). Note also that such a precomputation have a slight impact on the result accuracy (most of the time negligible and sometime better than the reference implementation).
On my machine, this strategy make the code 4.5 times faster with fastmath=True and 2.8 times faster without.
The k-based loop can be parallelized using parallel=True and prange of Numba. However, this may not be always faster on all machines (especially the ones with a lot of cores) since the loop is pretty fast.
Here is the final code:
#njit((complex128, float64, float64[:]), cache=True, parallel=True)
def _mie_S1_S2_opt(m, x, mu):
nstop = int(x + 4.05 * x**0.33333 + 2.0) + 1
a = np.zeros(nstop - 1, dtype=np.complex128)
b = np.zeros(nstop - 1, dtype=np.complex128)
_mie_An_Bn(m, x, a, b)
nangles = len(mu)
S1 = np.zeros(nangles, dtype=np.complex128)
S2 = np.zeros(nangles, dtype=np.complex128)
factor1 = np.empty(nstop, dtype=np.float64)
factor2 = np.empty(nstop, dtype=np.float64)
factor3 = np.empty(nstop, dtype=np.float64)
for n in range(1, nstop):
factor1[n - 1] = (2 * n + 1) / (n + 1) / n
factor2[n - 1] = (2 * n + 1) / n
factor3[n - 1] = (n + 1) / n
nstop = len(a)
for k in nb.prange(nangles):
pi_nm2 = 0
pi_nm1 = 1
for n in range(1, nstop):
i = n - 1
tau_nm1 = n * mu[k] * pi_nm1 - (n + 1.0) * pi_nm2
S1[k] += factor1[i] * (pi_nm1 * a[i] + tau_nm1 * b[i])
S2[k] += factor1[i] * (tau_nm1 * a[i] + pi_nm1 * b[i])
temp = pi_nm1
pi_nm1 = factor2[i] * mu[k] * pi_nm1 - factor3[i] * pi_nm2
pi_nm2 = temp
# calculate norm = sqrt(pi * Qext * x**2)
n = np.arange(1, nstop + 1)
norm = np.sqrt(2 * np.pi * np.sum((2 * n + 1) * (a.real + b.real)))
S1 /= norm
S2 /= norm
return [S1, S2]
%timeit -n 1000 _mie_S1_S2_opt(m, x, mu)
On my machine with 6 cores, the final optimized implementation is 12 times faster with fastmath=True and 8.8 times faster without. Note that using similar strategies in other functions may also helps to speed up them.
so I have a function in numba that is not compiling for some reason (calculates area under ROC curve). I am also not sure how I would be able to debug the function because my debugger is not working when I set a breakpoint in the numba decorated function.
Here is the function:
#nb.njit()
def auc_numba(fcst, obs):
L = obs.size
i_ord = fcst.argsort()
sumV = 0.
sumV2 = 0.
sumW = 0.
sumW2 = 0.
n = 0
m = 0
i = 0
while True:
nn = mm = 0
while True:
j = i_ord[i]
if obs[j]:
mm += 1
else:
nn += 1
if i == L - 1:
break
jp1 = i_ord[i + 1]
if fcst[j] != fcst[jp1]:
break
i += 1
sumW += nn * (m + mm / 2.0)
sumW2 += nn * (m + mm / 2.0) * (m + mm / 2.0)
sumV += mm * (n + nn / 2.0)
sumV2 += mm * (n + nn / 2.0) * (n + nn / 2.0)
n += nn
m += mm
i += 1
if i >= L:
break
theta = sumV / (m * n)
v = sumV2 / ((m - 1) * n * n) - sumV * sumV / (m * (m - 1) * n * n)
w = sumW2 / ((n - 1) * m * m) - sumW * sumW / (n * (n - 1) * m * m)
sd_auc = np.sqrt(v / m + w / n)
return np.array([theta, sd_auc])
What I am thinking is that there is something wrong with the while loops that I have implemented. It's possible that the types are wrong, hence the break is not being activated and the function is running forever.
Here is some sample data to test:
obs = np.array([1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1])
fcst = np.array([0.7083333, 0.5416667, 0.875, 0.5833333, 0.2083333, 0.8333333, 0.1666667, 0.9583333, 0.625, 0.1666667, 0.5, 1.0, 0.6666667, 0.2083333, 0.875, 0.75, 0.625, 0.3333333, 0.8333333, 0.2083333, 0.125, 0.0, 0.875, 0.8333333, 0.125, 0.5416667, 0.75])
When I run this without the decorator I get [0.89488636 0.06561209] which are the correct values.
So I guess if I could just get some help understanding why it is not compiling and maybe some tips on how to debug in numba?
There is something strange going on with the double while True loop. For whatever reason (and I don't understand it), if you create two variables x and y at the top and then:
x = 1
y = 0
while True:
nn = mm = 0
while x > y:
and keep everything else the same, the code works. I'm going to submit an issue to the Numba tracker since this seems like a bug to me.
Update: The numba issue can be found here
I'm trying to implement the DOSNES algorithm from this publication but in Python for a project. I found this Matlab Implementation which works well but I probably mistranslated one or more steps in my code (mainly with axis I guess) because I clearly don't reach the same result. This is the part I'm strugglering with in Matlab:
P(1:n + 1:end) = 0; % set diagonal to zero
P = 0.5 * (P + P'); '% symmetrize P-values
P = max(P ./ sum(P(:)), realmin); % make sure P-values sum to one
const = sum(P(:) .* log(P(:))); % constant in KL divergence
ydata = .0001 * randn(n, no_dims);
y_incs = zeros(size(ydata));
gains = ones(size(ydata));
% Run the iterations
for iter=1:max_iter
% Compute joint probability that point i and j are neighbors
sum_ydata = sum(ydata .^ 2, 2);
num = 1 ./ (1 + bsxfun(#plus, sum_ydata, bsxfun(#plus, sum_ydata', -2 * (ydata * ydata')))); % Student-t distribution
num(1:n+1:end) = 0; % set diagonal to zero
Q = max(num ./ sum(num(:)), realmin); % normalize to get probabilities
% Compute the gradients (faster implementation)
L = (P - Q) .* num;
y_grads = 4 * (diag(sum(L, 1)) - L) * ydata;
% Update the solution
gains = (gains + .2) .* (sign(y_grads) ~= sign(y_incs)) ... % note that the y_grads are actually -y_grads
+ (gains * .8) .* (sign(y_grads) == sign(y_incs));
gains(gains < min_gain) = min_gain;
y_incs = momentum * y_incs - epsilon * (gains .* y_grads);
ydata = ydata + y_incs;
% Spherical projection
ydata = bsxfun(#minus, ydata, mean(ydata, 1));
r_mean = mean(sqrt(sum(ydata.^2,2)),1);
ydata = bsxfun(#times, ydata, r_mean./ sqrt(sum(ydata.^2,2)) );
% Update the momentum if necessary
if iter == mom_switch_iter
momentum = final_momentum;
end
% Print out progress
if ~rem(iter, 10)
cost = const - sum(P(:) .* log(Q(:)));
disp(['Iteration ' num2str(iter) ': error is ' num2str(cost)]);
end
end
and this is my python version :
no_dims = 3
n = X.shape[0]
min_gain = 0.01
momentum = 0.5
final_momentum = 0.8
epsilon = 500
mom_switch_iter = 250
max_iter = 1000
P[np.diag_indices_from(P)] = 0.
P = ( P + P.T )/2
P = np.max(P / np.sum(P), axis=0)
const = np.sum( P * np.log(P) )
ydata = 1e-4 * np.random.random(size=(n, no_dims))
y_incs = np.zeros(shape=ydata.shape)
gains = np.ones(shape=ydata.shape)
for iter in range(max_iter):
sum_ydata = np.sum(ydata**2, axis = 1)
bsxfun_1 = sum_ydata.T + -2*np.dot(ydata, ydata.T)
bsxfun_2 = sum_ydata + bsxfun_1
num = 1. / ( 1 + bsxfun_2 )
num[np.diag_indices_from(num)] = 0.
Q = np.max(num / np.sum(num), axis=0)
L = (P - Q) * num
t = np.diag( L.sum(axis=0) ) - L
y_grads = 4 * np.dot( t , ydata )
gains = (gains + 0.2) * ( np.sign(y_grads) != np.sign(y_incs) ) \
+ (gains * 0.8) * ( np.sign(y_grads) == np.sign(y_incs) )
# gains[np.where(np.sign(y_grads) != np.sign(y_incs))] += 0.2
# gains[np.where(np.sign(y_grads) == np.sign(y_incs))] *= 0.8
gains = np.clip(gains, a_min = min_gain, a_max = None)
y_incs = momentum * y_incs - epsilon * gains * y_grads
ydata += y_incs
ydata -= ydata.mean(axis=0)
alpha = np.sqrt(np.sum(ydata ** 2, axis=1))
r_mean = np.mean(alpha)
ydata = ydata * (r_mean / alpha).reshape(-1, 1)
if iter == mom_switch_iter:
momentum = final_momentum
if iter % 10 == 0:
cost = const - np.sum( P * np.log(Q) )
print( "Iteration {} : error is {}".format(iter, cost) )
If you want to do trials, you can download the repository here which uses Iris Dataset and an attached library. test.py is my test implementation with Iris dataset and visu.py is the result the paper has on MNIST dataset but restricted to 1000k random points.
Many thanks for your support,
Nicolas
EDIT 1
This is the final code working as expected :
P[np.diag_indices_from(P)] = 0.
P = ( P + P.T )/2
P = P / np.sum(P)
const = np.sum(xlogy(P, P))
ydata = 1e-4 * np.random.random(size=(n, no_dims))
y_incs = np.zeros(shape=ydata.shape)
gains = np.ones(shape=ydata.shape)
for iter in range(max_iter):
sum_ydata = np.sum(ydata**2, axis = 1)
bsxfun_1 = sum_ydata.T + -2*np.dot(ydata, ydata.T)
bsxfun_2 = sum_ydata + bsxfun_1
num = 1. / ( 1 + bsxfun_2 )
num[np.diag_indices_from(num)] = 0.
Q = num / np.sum(num)
L = (P - Q) * num
t = np.diag( L.sum(axis=0) ) - L
y_grads = 4 * np.dot( t , ydata )
gains = (gains + 0.2) * ( np.sign(y_grads) != np.sign(y_incs) ) \
+ (gains * 0.8) * ( np.sign(y_grads) == np.sign(y_incs) )
gains = np.clip(gains, a_min = min_gain, a_max = None)
y_incs = momentum * y_incs - epsilon * gains * y_grads
ydata += y_incs
ydata -= ydata.mean(axis=0)
alpha = np.sqrt(np.sum(ydata ** 2, axis=1))
r_mean = np.mean(alpha)
ydata = ydata * (r_mean / alpha).reshape(-1, 1)
if iter == mom_switch_iter:
momentum = final_momentum
if iter % 10 == 0:
cost = const - np.sum( xlogy(P, Q) )
print( "Iteration {} : error is {}".format(iter, cost) )
Right at the beginning you seem to replace a nonreducing max in matlab (it has two arguments, so it will compare those one by one and return a full size P) with a reducing max in python (axis=0 will reduce along this axis, meaning that the result will have one dimension less).
My advice, however, is to leave out the max altogether because it looks pretty much like an amateurish attempt of sidestepping the problem of p log p being defined at 0 only via taking the limit p->0 which using L'Hopital's rule can be shown to be 0, whereas the computer will returm NaN when asked to compute 0 * log(0).
The proper way of going about this is using scipy.special.xlogy which treats 0 correctly.
Are there any pure-python implementations of the inverse error function?
I know that SciPy has scipy.special.erfinv(), but that relies on some C extensions. I'd like a pure python implementation.
I've tried writing my own using the Wikipedia and Wolfram references, but it always seems to diverge from the true value when the arg is > 0.9.
I've also attempted to port the underlying C code that Scipy uses (ndtri.c and the cephes polevl.c functions) but that's also not passing my unit tests.
Edit: As requested, I've added the ported code.
Docstrings (and doctests) have been removed because they're longer than the functions. I haven't yet put much effort into making the port more pythonic - I'll worry about that once I get something that passes unit tests.
Supporting functions from cephes polevl.c
def polevl(x, coefs, N):
ans = 0
power = len(coefs) - 1
for coef in coefs[:N]:
ans += coef * x**power
power -= 1
return ans
def p1evl(x, coefs, N):
return polevl(x, [1] + coefs, N)
Main Inverse Error Function
def inv_erf(z):
if z < -1 or z > 1:
raise ValueError("`z` must be between -1 and 1 inclusive")
if z == 0:
return 0
if z == 1:
return math.inf
if z == -1:
return -math.inf
# From scipy special/cephes/ndrti.c
def ndtri(y):
# approximation for 0 <= abs(z - 0.5) <= 3/8
P0 = [
-5.99633501014107895267E1,
9.80010754185999661536E1,
-5.66762857469070293439E1,
1.39312609387279679503E1,
-1.23916583867381258016E0,
]
Q0 = [
1.95448858338141759834E0,
4.67627912898881538453E0,
8.63602421390890590575E1,
-2.25462687854119370527E2,
2.00260212380060660359E2,
-8.20372256168333339912E1,
1.59056225126211695515E1,
-1.18331621121330003142E0,
]
# Approximation for interval z = sqrt(-2 log y ) between 2 and 8
# i.e., y between exp(-2) = .135 and exp(-32) = 1.27e-14.
P1 = [
4.05544892305962419923E0,
3.15251094599893866154E1,
5.71628192246421288162E1,
4.40805073893200834700E1,
1.46849561928858024014E1,
2.18663306850790267539E0,
-1.40256079171354495875E-1,
-3.50424626827848203418E-2,
-8.57456785154685413611E-4,
]
Q1 = [
1.57799883256466749731E1,
4.53907635128879210584E1,
4.13172038254672030440E1,
1.50425385692907503408E1,
2.50464946208309415979E0,
-1.42182922854787788574E-1,
-3.80806407691578277194E-2,
-9.33259480895457427372E-4,
]
# Approximation for interval z = sqrt(-2 log y ) between 8 and 64
# i.e., y between exp(-32) = 1.27e-14 and exp(-2048) = 3.67e-890.
P2 = [
3.23774891776946035970E0,
6.91522889068984211695E0,
3.93881025292474443415E0,
1.33303460815807542389E0,
2.01485389549179081538E-1,
1.23716634817820021358E-2,
3.01581553508235416007E-4,
2.65806974686737550832E-6,
6.23974539184983293730E-9,
]
Q2 = [
6.02427039364742014255E0,
3.67983563856160859403E0,
1.37702099489081330271E0,
2.16236993594496635890E-1,
1.34204006088543189037E-2,
3.28014464682127739104E-4,
2.89247864745380683936E-6,
6.79019408009981274425E-9,
]
s2pi = 2.50662827463100050242
code = 1
if y > (1.0 - 0.13533528323661269189): # 0.135... = exp(-2)
y = 1.0 - y
code = 0
if y > 0.13533528323661269189:
y = y - 0.5
y2 = y * y
x = y + y * (y2 * polevl(y2, P0, 4) / p1evl(y2, Q0, 8))
x = x * s2pi
return x
x = math.sqrt(-2.0 * math.log(y))
x0 = x - math.log(x) / x
z = 1.0 / x
if x < 8.0: # y > exp(-32) = 1.2664165549e-14
x1 = z * polevl(z, P1, 8) / p1evl(z, Q1, 8)
else:
x1 = z * polevl(z, P2, 8) / p1evl(z, Q2, 8)
x = x0 - x1
if code != 0:
x = -x
return x
result = ndtri((z + 1) / 2.0) / math.sqrt(2)
return result
I think the error in your code is in the for loop over coefficients in the polevl function. If you replace what you have with the function below everything seems to work.
def polevl(x, coefs, N):
ans = 0
power = len(coefs) - 1
for coef in coefs:
ans += coef * x**power
power -= 1
return ans
I have tested it against scipy's implementation with the following code:
import numpy as np
from scipy.special import erfinv
N = 100000
x = np.random.rand(N) - 1.
# Calculate the inverse of the error function
y = np.zeros(N)
for i in range(N):
y[i] = inv_erf(x[i])
assert np.allclose(y, erfinv(x))
sympy? some digging may be needed to see how its implemented internally http://docs.sympy.org/latest/modules/functions/special.html#sympy.functions.special.error_functions.erfinv
from sympy import erfinv
erfinv(0.9).evalf(30)
1.16308715367667425688580351562