Numpy - vectorize the bivariate poisson pmf equation - python

I'm trying to write a function to evaluate the probability mass function for the bivariate poisson distribution.
This is easy when all of the parameters (x, y, theta1, theta2, theta0) are scalars, but tricky to scale up without loops to allow these parameters to be vectors. I need it to scale such that, for:
theta0 being a scalar - the "correlation parameter" in the equation
theta1 and theta2 having length l
x, y both having length n
the output array would have shape (l, n, n). For example, a slice [j, :, :] from the output array would look like:
The first part (the constant, before the summation) I think i've figured out:
import numpy as np
from scipy.special import factorial
def constant(theta1, theta2, theta0, x, y):
exponential_part = np.exp(-(theta1 + theta2 + theta0)).reshape(-1, 1, 1)
x = np.tile(x, (len(x), 1)).transpose()
y = np.tile(y, (len(y), 1))
double_factorial = (np.power(np.array(theta1).reshape(-1, 1, 1), x)/factorial(x)) * \
(np.power(np.array(theta2).reshape(-1, 1, 1), y)/factorial(y))
return exponential_part * double_factorial
But I'm struggling with the summation part. How can I vectorize a summation where the limits depend on variable arrays?

I think I have this figured out, based on the approach that #w-m suggests: calculate every possible summation term which could appear, based on the maximum x or y value which appears, and use a mask to get rid of the ones you don't want. Assuming you have your x and y terms go from 0 to N, in consecutive order, this is calculating up to three times more terms than are actually required, but this is offset by getting to use vectorization.
Reference implementation
I wrote this by first writing a pure-Python reference implementation, which just implements your problem using loops. With 4 nested loops, it's not exactly fast, but it's handy to have while testing the numpy version.
import numpy as np
from scipy.special import factorial, comb
import operator as op
from functools import reduce
def choose(n, r):
# https://stackoverflow.com/a/4941932/530160
r = min(r, n-r)
numer = reduce(op.mul, range(n, n-r, -1), 1)
denom = reduce(op.mul, range(1, r+1), 1)
return numer // denom # or / in Python 2
def reference_impl_constant(s_theta1, s_theta2, s_theta0, s_x, s_y):
# Cast to float to prevent overflow
s_theta1 = float(s_theta1)
s_theta2 = float(s_theta2)
s_theta0 = float(s_theta0)
s_x = float(s_x)
s_y = float(s_y)
term1 = np.exp(-(s_theta1 + s_theta2 + s_theta0))
term2 = (s_theta1 ** s_x / factorial(s_x))
term3 = (s_theta2 ** s_y / factorial(s_y))
assert term1 >= 0
assert term2 >= 0
assert term3 >= 0
return term1 * term2 * term3
def reference_impl_constant_loop(theta1, theta2, theta0, x, y):
theta_len = theta1.shape[0]
xy_len = x.shape[0]
constant_array = np.zeros((theta_len, xy_len, xy_len))
for i in range(theta_len):
for j in range(xy_len):
for k in range(xy_len):
s_theta1 = theta1[i]
s_theta2 = theta2[i]
s_theta0 = theta0
s_x = x[j]
s_y = y[k]
constant_term = reference_impl_constant(s_theta1, s_theta2, s_theta0, s_x, s_y)
assert constant_term >= 0
constant_array[i, j, k] = constant_term
return constant_array
def reference_impl_summation(s_theta1, s_theta2, s_theta0, s_x, s_y):
sum_ = 0
for i in range(min(s_x, s_y) + 1):
sum_ += choose(s_x, i) * choose(s_y, i) * factorial(i) * ((s_theta0/s_theta1/s_theta2) ** i)
assert sum_ >= 0
return sum_
def reference_impl_summation_loop(theta1, theta2, theta0, x, y):
theta_len = theta1.shape[0]
xy_len = x.shape[0]
summation_array = np.zeros((theta_len, xy_len, xy_len))
for i in range(theta_len):
for j in range(xy_len):
for k in range(xy_len):
s_theta1 = theta1[i]
s_theta2 = theta2[i]
s_theta0 = theta0
s_x = x[j]
s_y = y[k]
summation_term = reference_impl_summation(s_theta1, s_theta2, s_theta0, s_x, s_y)
assert summation_term >= 0
summation_array[i, j, k] = summation_term
return summation_array
def reference_impl(theta1, theta2, theta0, x, y):
# all array inputs must be 1D
assert len(theta1.shape) == 1
assert len(theta2.shape) == 1
assert len(x.shape) == 1
assert len(y.shape) == 1
# theta vectors must have same length
theta_len = theta1.shape[0]
assert theta2.shape[0] == theta_len
# x and y must have same length
xy_len = x.shape[0]
assert y.shape[0] == xy_len
# theta0 is scalar
assert isinstance(theta0, (int, float))
constant_array = np.zeros((theta_len, xy_len, xy_len))
output = np.zeros((theta_len, xy_len, xy_len))
constant_array = reference_impl_constant_loop(theta1, theta2, theta0, x, y)
summation_array = reference_impl_summation_loop(theta1, theta2, theta0, x, y)
output = constant_array * summation_array
return output
Numpy implementation
I split the implementation of this across two functions.
The fast_constant() function calculates everything to the left of the summation symbol. The fast_summation() function calculates everything inside the summation symbol.
import numpy as np
from scipy.special import factorial, comb
def fast_summation(theta1, theta2, theta0, x, y):
x = np.tile(x, (len(x), 1)).transpose()
y = np.tile(y, (len(y), 1))
sum_limit = np.minimum(x, y)
max_sum_limit = np.max(sum_limit)
i = np.arange(max_sum_limit + 1).reshape(-1, 1, 1)
summation_mask = (i <= sum_limit)
theta_ratio = (theta0 / (theta1 * theta2)).reshape(-1, 1, 1, 1)
theta_to_power = np.power(theta_ratio, i)
terms = comb(x, i) * comb(y, i) * factorial(i) * theta_to_power
# mask out terms which aren't part of sum
terms *= summation_mask
# axis 0 is theta
# axis 1 is i
# axis 2 & 3 are x and y
# so sum across axis 1
terms = terms.sum(axis=1)
return terms
def fast_constant(theta1, theta2, theta0, x, y):
theta1 = theta1.astype('float64')
theta2 = theta2.astype('float64')
exponential_part = np.exp(-(theta1 + theta2 + theta0)).reshape(-1, 1, 1)
# x and y must be 1D
assert len(x.shape) == 1
assert len(y.shape) == 1
# x and y must have same shape
assert x.shape == y.shape
x_len, y_len = x.shape[0], y.shape[0]
x = x.reshape((x_len, 1))
y = y.reshape((1, y_len))
double_factorial = (np.power(np.array(theta1).reshape(-1, 1, 1), x)/factorial(x)) * \
(np.power(np.array(theta2).reshape(-1, 1, 1), y)/factorial(y))
return exponential_part * double_factorial
def fast_impl(theta1, theta2, theta0, x, y):
return fast_summation(theta1, theta2, theta0, x, y) * fast_constant(theta1, theta2, theta0, x, y)
Benchmarking
Assuming that X and Y range from 0 to 20, and that theta is centered somewhere inside that range, I get the result that the numpy version is roughly 280 times faster than the pure python reference.
Numerical stability
I'm unsure how numerically stable this is. For example, when I center theta at 100, I get a floating-point overflow. Typically, when computing an expression which has lots of choose and factorial expressions inside it, you'll use some mathematical equivalent which results in smaller intermediate sums. In this case I have so little understanding of the math that I don't know how you'd do that.

Related

Full algorithm (math) of natural cubic splines computation in Python?

I'm interested in full Python code (with math formulas) with all computations needed to calculate natural Cubic Splines from scratch. If possible, fast (e.g. Numpy-based).
I created this question only to share my code (as answer) that I programmed recently from scratch (based on Wikipedia) when learning cubic splines.
I programmed the following code based on Russian Wikipedia Article, as I see almost the same description and formulas are located in English Article.
To speed-up computation I used both Numpy and Numba.
To check the correctness of code I made tests with comparison to reference implementation of the natural cubic spline of scipy.interpolate.CubicSpline, you can see np.allclose(...) assertion in my code that proves my formulas are correct.
Also, I did timings:
calc (spline_scipy): Timed best=2.712 ms, mean=2.792 +- 0.1 ms
calc (spline_numba): Timed best=916.000 us, mean=938.868 +- 17.9 us
speedup: 2.973
use (spline_scipy): Timed best=5.262 ms, mean=5.320 +- 0.1 ms
use (spline_numba): Timed best=4.745 ms, mean=5.420 +- 0.3 ms
speedup: 0.981
which shows that my spline-params computation is around 3x times faster than the Scipy version and usage of spline (computation for given x) is the same speed as Scipy.
Running code below needs one-time installing following packages python -m pip install numpy numba scipy timerit, here scipy and timerit are only needed for testing purposes and not needed for actual algorithm.
Code draws plots showing original multi-line and both spline approximation for Scipy and Numba versions, as one can see Scipy and Numba lines are the same (meaning that spline computation is same):
Code:
Try it online!
import numpy as np, numba
# Solves linear system given by Tridiagonal Matrix
# Helper for calculating cubic splines
#numba.njit(
[f'f{ii}[:](f{ii}[:], f{ii}[:], f{ii}[:], f{ii}[:])' for ii in (4, 8)],
cache = True, fastmath = True, inline = 'always')
def tri_diag_solve(A, B, C, F):
n = B.size
assert A.ndim == B.ndim == C.ndim == F.ndim == 1 and (
A.size == B.size == C.size == F.size == n
) #, (A.shape, B.shape, C.shape, F.shape)
Bs, Fs = np.zeros_like(B), np.zeros_like(F)
Bs[0], Fs[0] = B[0], F[0]
for i in range(1, n):
Bs[i] = B[i] - A[i] / Bs[i - 1] * C[i - 1]
Fs[i] = F[i] - A[i] / Bs[i - 1] * Fs[i - 1]
x = np.zeros_like(B)
x[-1] = Fs[-1] / Bs[-1]
for i in range(n - 2, -1, -1):
x[i] = (Fs[i] - C[i] * x[i + 1]) / Bs[i]
return x
# Calculate cubic spline params
#numba.njit(
#[f'(f{ii}, f{ii}, f{ii}, f{ii})(f{ii}[:], f{ii}[:])' for ii in (4, 8)],
cache = True, fastmath = True, inline = 'always')
def calc_spline_params(x, y):
a = y
h = np.diff(x)
c = np.concatenate((np.zeros((1,), dtype = y.dtype),
np.append(tri_diag_solve(h[:-1], (h[:-1] + h[1:]) * 2, h[1:],
((a[2:] - a[1:-1]) / h[1:] - (a[1:-1] - a[:-2]) / h[:-1]) * 3), 0)))
d = np.diff(c) / (3 * h)
b = (a[1:] - a[:-1]) / h + (2 * c[1:] + c[:-1]) / 3 * h
return a[1:], b, c[1:], d
# Spline value calculating function, given params and "x"
#numba.njit(
[f'f{ii}[:](f{ii}[:], i8[:], f{ii}[:], f{ii}[:], f{ii}[:], f{ii}[:], f{ii}[:])' for ii in (4, 8)],
cache = True, fastmath = True, inline = 'always')
def func_spline(x, ix, x0, a, b, c, d):
dx = x - x0[1:][ix]
return a[ix] + (b[ix] + (c[ix] + d[ix] * dx) * dx) * dx
#numba.njit(
[f'i8[:](f{ii}[:], f{ii}[:], b1)' for ii in (4, 8)],
cache = True, fastmath = True, inline = 'always')
def searchsorted_merge(a, b, sort_b):
ix = np.zeros((len(b),), dtype = np.int64)
if sort_b:
ib = np.argsort(b)
pa, pb = 0, 0
while pb < len(b):
if pa < len(a) and a[pa] < (b[ib[pb]] if sort_b else b[pb]):
pa += 1
else:
ix[pb] = pa
pb += 1
return ix
# Compute piece-wise spline function for "x" out of sorted "x0" points
#numba.njit([f'f{ii}[:](f{ii}[:], f{ii}[:], f{ii}[:], f{ii}[:], f{ii}[:], f{ii}[:])' for ii in (4, 8)],
cache = True, fastmath = True, inline = 'always')
def piece_wise_spline(x, x0, a, b, c, d):
xsh = x.shape
x = x.ravel()
#ix = np.searchsorted(x0[1 : -1], x)
ix = searchsorted_merge(x0[1 : -1], x, False)
y = func_spline(x, ix, x0, a, b, c, d)
y = y.reshape(xsh)
return y
def test():
import matplotlib.pyplot as plt, scipy.interpolate
from timerit import Timerit
Timerit._default_asciimode = True
np.random.seed(0)
def f(n):
x = np.sort(np.random.uniform(0., n / 5 * np.pi, (n,))).astype(np.float64)
return x, (np.sin(x) * 5 + np.sin(1 + 2.5 * x) * 3 + np.sin(2 + 0.5 * x) * 2).astype(np.float64)
def spline_numba(x0, y0):
a, b, c, d = calc_spline_params(x0, y0)
return lambda x: piece_wise_spline(x, x0, a, b, c, d)
def spline_scipy(x0, y0):
f = scipy.interpolate.CubicSpline(x0, y0, bc_type = 'natural')
return lambda x: f(x)
def timings():
x0, y0 = f(10000)
s, t = {}, []
gs = [spline_scipy, spline_numba]
spline_numba(np.copy(x0[::3]), np.copy(y0[::3])) # pre-compile numba
for g in gs:
print('calc (', g.__name__, '): ', sep = '', end = '', flush = True)
tim = Timerit(num = 150, verbose = 1)
for _ in tim:
s_ = g(x0, y0)
s[g.__name__] = s_
t.append(tim.mean())
if len(t) >= 2:
print('speedup:', round(t[-2] / t[-1], 3))
print()
x = np.linspace(x0[0], x0[-1], 50000, dtype = np.float64)
t = []
s['spline_numba'](np.copy(x[::3])) # pre-compile numba
for i in range(len(s)):
print('use (', gs[i].__name__, '): ', sep = '', end = '', flush = True)
tim = Timerit(num = 100, verbose = 1)
sg = s[gs[i].__name__]
for _ in tim:
sg(x)
t.append(tim.mean())
if len(t) >= 2:
print('speedup:', round(t[-2] / t[-1], 3))
x0, y0 = f(50)
timings()
shift = 3
x = np.linspace(x0[0], x0[-1], 1000, dtype = np.float64)
ys = spline_scipy(x0, y0)(x)
yn = spline_numba(x0, y0)(x)
assert np.allclose(ys, yn), np.absolute(ys - yn).max()
plt.plot(x0, y0, label = 'orig')
plt.plot(x, ys, label = 'spline_scipy')
plt.plot(x, yn, '-.', label = 'spline_numba')
plt.legend()
plt.show()
if __name__ == '__main__':
test()

Numerical radial derivative of function evaluated on Cartesian grid

I have a radially symmetric function evaluated on a 3D Cartesian grid. How can I numerically calculate the radial derivative of the function?
For a simple example (spherical Gaussian), calculate derivatives df/dx, df/dy and df/dz:
# Parameters
start = 0
end = 5
n = 20
# Variables
x = np.linspace(start, end, num=n)
y = np.linspace(start, end, num=n)
z = np.linspace(start, end, num=n)
dx = (end - start) / n
dy = (end - start) / n
dz = (end - start) / n
x_grid, y_grid, z_grid = np.meshgrid(x, y, z)
eval_xyz = np.exp(-(x_grid ** 2 + y_grid ** 2 + z_grid ** 2))
# Allocate
df_dx = np.zeros((n, n, n))
df_dy = np.zeros((n, n, n))
df_dz = np.zeros((n, n, n))
# Calculate Cartesian gradient numerically
for x in range(eval_xyz.shape[0] - 1):
for y in range(eval_xyz.shape[1] - 1):
for z in range(eval_xyz.shape[2] - 1):
df_dx[x, y, z] = (eval_xyz[x + 1, y, z] - eval_xyz[x, y, z]) / dx
df_dy[x, y, z] = (eval_xyz[x, y + 1, z] - eval_xyz[x, y, z]) / dy
df_dz[x, y, z] = (eval_xyz[x, y, z + 1] - eval_xyz[x, y, z]) / dz
Is it then possible to easily calculate the radial derivative df/dr from the Cartesian derivatives?
The trick is to express the radial derivatives as sum of Cartesian derivatives, taking into account theta and phi at each point which can be expressed in Cartesian coordiantes as:
The code therefore becomes:
theta_val = theta(i * dx, j * dy, k * dz)
phi_val = phi(i * dx, j * dy)
df_dr[i, j, k] = df_dx[i, j, k] * np.sin(theta_val) * np.cos(phi_val) \
+ df_dy[i, j, k] * np.sin(theta_val) * np.sin(phi_val) \
+ df_dz[i, j, k] * np.cos(theta_val)
Where theta and phi are calculated carefully to deal with divide by zero
def theta(x, y, z):
if x == 0 and y == 0 and z == 0:
return 0
elif z == 0:
return np.pi / 2
elif x == 0 and y == 0:
return 0
else:
return np.arctan(np.sqrt(x ** 2 + y ** 2) / z)
def phi(x, y):
if x == 0 and y == 0:
return 0
elif x == 0:
return np.pi / 2
elif y == 0:
return 0
else:
return math.atan2(y, x)
Your own answer is a step in the right direction, but there are some issues both in the answer and in the code generating the Cartesian derivatives.
These lines have a problem:
x = np.linspace(start, end, num=n)
dx = (end - start) / n
The step size is actually (end-start)/(n-1).
Here:
x_grid, y_grid, z_grid = np.meshgrid(x, y, z)
df_dx[x, y, z] = (eval_xyz[x + 1, y, z] - eval_xyz[x, y, z]) / dx
you fell in the trap of meshgrid's default setting: meshgrid(np.arange(n1), np.arange(n2)) will return arrays in the shape (n2, n1) unless you add the parameter indexing='ij'. Because you have size n in all dimensions, you will not get indexing errors to alert you, but you might be spending a lot of time trying to debug why the numbers make no sense.
When you manipulate multidimensional arrays, it's a good idea to set the sizes in different directions to slightly different values, so that you can easily check that the array shapes are what you want them to be.
Also, you should generally evaluate the derivative as (f[i+1]-f[i-1])/(2*dx), which is correct up to the second order in x.
for x in range(eval_xyz.shape[0] - 1):
for y in range(eval_xyz.shape[1] - 1):
for z in range(eval_xyz.shape[2] - 1):
When working with numpy, you should always try to vectorize operations rather than writing out for loops that potentially need to iterate over thousands of elements.
Here is code that calculates the Cartesian derivative and then the radial derivative.
import numpy as np
def get_cartesian_gradient(f, xyzsteps):
"""For f shape (nx, ny, nz), return gradient as (3, nx, ny, nz) shape.
xyzsteps is a (3,) array.
Note: edge points of the gradient array are set to NaN.
(Exercise for the reader to implement those).
"""
fshape = f.shape
grad = np.full((3,) + fshape, np.nan, dtype=np.float64)
sl, sm, sr = slice(0, -2), slice(1, -1), slice(2, None)
# Note: multiplying is faster than dividing.
grad[0, sm, sm, sm] = (f[sr, sm, sm] - f[sl, sm, sm]) * (0.5/xyzsteps[0])
grad[1, sm, sm, sm] = (f[sm, sr, sm] - f[sm, sl, sm]) * (0.5/xyzsteps[1])
grad[2, sm, sm, sm] = (f[sm, sm, sr] - f[sm, sm, sl]) * (0.5/xyzsteps[2])
return grad
def get_dfdr_from_cartesian(grad, x1s, y1s, z1s):
"""Return df/dr array from gradient(f).
grad.shape must be (3, nx, ny, nz)
return shape (nx, ny, nz).
"""
_, nx, ny, nz = grad.shape
# we need sin(theta), cos(theta), sin(phi), and cos(phi)
# rxy: shape (nx, ny, 1)
rxy = np.sqrt(x1s.reshape(-1, 1, 1)**2 + y1s.reshape(1, -1, 1)**2)
# r: shape (nx, ny, nz)
r = np.sqrt(rxy**2 + z1s.reshape(1, 1, -1)**2)
# change zeros to NaN
r = np.where(r==0, np.nan, r)
rxy = np.where(rxy==0, np.nan, rxy)
cos_theta = z1s.reshape(1, 1, -1) / r
sin_theta = rxy / r
cos_phi = x1s.reshape(-1, 1, 1) / rxy
sin_phi = y1s.reshape(1, -1, 1) / rxy
# and the derivative
dfdr = (grad[0]*cos_phi + grad[1]*sin_phi)*sin_theta + grad[2]*cos_theta
return dfdr
x1s = np.linspace(-1, 1, 19)
y1s = np.linspace(-1, 1, 21)
z1s = np.linspace(-1, 1, 23)
xs, ys, zs = np.meshgrid(x1s, y1s, z1s, indexing='ij')
xyzsteps = [x1s[1]-x1s[0], y1s[1]-y1s[0], z1s[1]-z1s[0]]
def func(x, y, z):
return x**2 + y**2 + z**2
def dfdr_analytical(x, y, z):
r = np.sqrt(x**2 + y**2 + z**2)
return 2*r
# grad has shape (3, nx, ny, nz)
grad = get_cartesian_gradient(func(xs, ys, zs), xyzsteps)
dfdr = get_dfdr_from_cartesian(grad, x1s, y1s, z1s)
# test
diff = dfdr - dfdr_analytical(xs, ys, zs)
assert np.nanmax(np.abs(diff)) < 1e-14
Note that I've chosen to return NaN values for points on the z-axis, because df/dr is not defined there unless f(x,y,z) is rotationally symmetric around the z-axis and has df/dr=0 in all directions. This is something that is not guaranteed for an arbitrary dataset.
The reason for replacing zeros in the denominators by np.nan using np.where is because dividing by zero will give warning messages, whereas dividing by nan won't.

How do I determine a correlation coefficient in Python?

The equation that I'm working with is as follows-
The description says that x-bar and y-bar are the average of array 1 and array 2. The minimum coefficient is 0.3.
The reason I'm asking is because I am not too familiar with reading statistical equations, let alone implementing them in Python...
Easiest would be to use scipy.stats (see here)
import numpy as np
from scipy.stats.stats import pearsonr
x = np.random.random(20)
y = np.random.random(20)
print(pearsonr(x, y))
This will give you two values, the correlation and the p-value.
You can implement it yourself like this:
x = np.random.random(20)
y = np.random.random(20)
x_bar = np.mean(x)
y_bar = np.mean(y)
top = np.sum((x - x_bar) * (y - y_bar))
bot = np.sqrt(np.sum(np.power(x - x_bar, 2)) * np.sum(np.power(y - y_bar, 2)))
print(top/bot)
Both give the same result, good luck!
The straightforward implementation using for loops would be:
import math
def correlation(x, y):
x_bar = sum(x) / len(x)
y_bar = sum(y) / len(y)
var_x = sum((x_i - x_bar)**2 for x_i in x)
var_y = sum((y_i - y_bar)**2 for y_i in y)
assert len(x) == len(y)
numerator = sum((x_i - x_bar) * (y_i - y_bar) for x_i, y_i in zip(x, y))
denominator = math.sqrt(var_x * var_y)
return numerator / denominator
if __name__ == "__main__":
x = [...]
y = [...]
print(correlation(x, y))
When doing a lot of numeric calculations one usually uses the numpy module where this function is already defined:
import numpy as np
if __name__ == "__main__":
x = np.array([...])
y = np.array([...])
print(np.corrcoef(x, y)[0, 1])

How can I use multiple dimensional polynomials with numpy.polynomial?

I'm able to use numpy.polynomial to fit terms to 1D polynomials like f(x) = 1 + x + x^2. How can I fit multidimensional polynomials, like f(x,y) = 1 + x + x^2 + y + yx + y x^2 + y^2 + y^2 x + y^2 x^2? It looks like numpy doesn't support multidimensional polynomials at all: is that the case? In my real application, I have 5 dimensions of input and I am interested in hermite polynomials. It looks like the polynomials in scipy.special are also only available for one dimension of inputs.
# One dimension of data can be fit
x = np.random.random(100)
y = np.sin(x)
params = np.polynomial.polynomial.polyfit(x, y, 6)
np.polynomial.polynomial.polyval([0, .2, .5, 1.5], params)
array([ -5.01799432e-08, 1.98669317e-01, 4.79425535e-01,
9.97606096e-01])
# When I try two dimensions, it fails.
x = np.random.random((100, 2))
y = np.sin(5 * x[:,0]) + .4 * np.sin(x[:,1])
params = np.polynomial.polynomial.polyvander2d(x, y, [6, 6])
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-13-5409f9a3e632> in <module>()
----> 1 params = np.polynomial.polynomial.polyvander2d(x, y, [6, 6])
/usr/local/lib/python2.7/site-packages/numpy/polynomial/polynomial.pyc in polyvander2d(x, y, deg)
1201 raise ValueError("degrees must be non-negative integers")
1202 degx, degy = ideg
-> 1203 x, y = np.array((x, y), copy=0) + 0.0
1204
1205 vx = polyvander(x, degx)
ValueError: could not broadcast input array from shape (100,2) into shape (100)
I got annoyed that there is no simple function for a 2d polynomial fit of any number of degrees so I made my own. Like the other answers it uses numpy lstsq to find the best coefficients.
import numpy as np
from scipy.linalg import lstsq
from scipy.special import binom
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
def _get_coeff_idx(coeff):
idx = np.indices(coeff.shape)
idx = idx.T.swapaxes(0, 1).reshape((-1, 2))
return idx
def _scale(x, y):
# Normalize x and y to avoid huge numbers
# Mean 0, Variation 1
offset_x, offset_y = np.mean(x), np.mean(y)
norm_x, norm_y = np.std(x), np.std(y)
x = (x - offset_x) / norm_x
y = (y - offset_y) / norm_y
return x, y, (norm_x, norm_y), (offset_x, offset_y)
def _unscale(x, y, norm, offset):
x = x * norm[0] + offset[0]
y = y * norm[1] + offset[1]
return x, y
def polyvander2d(x, y, degree):
A = np.polynomial.polynomial.polyvander2d(x, y, degree)
return A
def polyscale2d(coeff, scale_x, scale_y, copy=True):
if copy:
coeff = np.copy(coeff)
idx = _get_coeff_idx(coeff)
for k, (i, j) in enumerate(idx):
coeff[i, j] /= scale_x ** i * scale_y ** j
return coeff
def polyshift2d(coeff, offset_x, offset_y, copy=True):
if copy:
coeff = np.copy(coeff)
idx = _get_coeff_idx(coeff)
# Copy coeff because it changes during the loop
coeff2 = np.copy(coeff)
for k, m in idx:
not_the_same = ~((idx[:, 0] == k) & (idx[:, 1] == m))
above = (idx[:, 0] >= k) & (idx[:, 1] >= m) & not_the_same
for i, j in idx[above]:
b = binom(i, k) * binom(j, m)
sign = (-1) ** ((i - k) + (j - m))
offset = offset_x ** (i - k) * offset_y ** (j - m)
coeff[k, m] += sign * b * coeff2[i, j] * offset
return coeff
def plot2d(x, y, z, coeff):
# regular grid covering the domain of the data
if x.size > 500:
choice = np.random.choice(x.size, size=500, replace=False)
else:
choice = slice(None, None, None)
x, y, z = x[choice], y[choice], z[choice]
X, Y = np.meshgrid(
np.linspace(np.min(x), np.max(x), 20), np.linspace(np.min(y), np.max(y), 20)
)
Z = np.polynomial.polynomial.polyval2d(X, Y, coeff)
fig = plt.figure()
ax = fig.gca(projection="3d")
ax.plot_surface(X, Y, Z, rstride=1, cstride=1, alpha=0.2)
ax.scatter(x, y, z, c="r", s=50)
plt.xlabel("X")
plt.ylabel("Y")
ax.set_zlabel("Z")
plt.show()
def polyfit2d(x, y, z, degree=1, max_degree=None, scale=True, plot=False):
"""A simple 2D polynomial fit to data x, y, z
The polynomial can be evaluated with numpy.polynomial.polynomial.polyval2d
Parameters
----------
x : array[n]
x coordinates
y : array[n]
y coordinates
z : array[n]
data values
degree : {int, 2-tuple}, optional
degree of the polynomial fit in x and y direction (default: 1)
max_degree : {int, None}, optional
if given the maximum combined degree of the coefficients is limited to this value
scale : bool, optional
Wether to scale the input arrays x and y to mean 0 and variance 1, to avoid numerical overflows.
Especially useful at higher degrees. (default: True)
plot : bool, optional
wether to plot the fitted surface and data (slow) (default: False)
Returns
-------
coeff : array[degree+1, degree+1]
the polynomial coefficients in numpy 2d format, i.e. coeff[i, j] for x**i * y**j
"""
# Flatten input
x = np.asarray(x).ravel()
y = np.asarray(y).ravel()
z = np.asarray(z).ravel()
# Remove masked values
mask = ~(np.ma.getmask(z) | np.ma.getmask(x) | np.ma.getmask(y))
x, y, z = x[mask].ravel(), y[mask].ravel(), z[mask].ravel()
# Scale coordinates to smaller values to avoid numerical problems at larger degrees
if scale:
x, y, norm, offset = _scale(x, y)
if np.isscalar(degree):
degree = (int(degree), int(degree))
degree = [int(degree[0]), int(degree[1])]
coeff = np.zeros((degree[0] + 1, degree[1] + 1))
idx = _get_coeff_idx(coeff)
# Calculate elements 1, x, y, x*y, x**2, y**2, ...
A = polyvander2d(x, y, degree)
# We only want the combinations with maximum order COMBINED power
if max_degree is not None:
mask = idx[:, 0] + idx[:, 1] <= int(max_degree)
idx = idx[mask]
A = A[:, mask]
# Do the actual least squares fit
C, *_ = lstsq(A, z)
# Reorder coefficients into numpy compatible 2d array
for k, (i, j) in enumerate(idx):
coeff[i, j] = C[k]
# Reverse the scaling
if scale:
coeff = polyscale2d(coeff, *norm, copy=False)
coeff = polyshift2d(coeff, *offset, copy=False)
if plot:
if scale:
x, y = _unscale(x, y, norm, offset)
plot2d(x, y, z, coeff)
return coeff
if __name__ == "__main__":
n = 100
x, y = np.meshgrid(np.arange(n), np.arange(n))
z = x ** 2 + y ** 2
c = polyfit2d(x, y, z, degree=2, plot=True)
print(c)
It doesn't look like polyfit supports fitting multivariate polynomials, but you can do it by hand, with linalg.lstsq. The steps are as follows:
Gather the degrees of monomials x**i * y**j you wish to use in the model. Think carefully about it: your current model already has 9 parameters, if you are going to push to 5 variables then with the current approach you'll end up with 3**5 = 243 parameters, a sure road to overfitting. Maybe limit to the monomials of __total_ degree at most 2 or three...
Plug the x-points into each monomial; this gives a 1D array. Stack all such arrays as columns of a matrix.
Solve a linear system with aforementioned matrix and with the right-hand side being the target values (I call them z because y is confusing when you also use x, y for two variables).
Here it is:
import numpy as np
x = np.random.random((100, 2))
z = np.sin(5 * x[:,0]) + .4 * np.sin(x[:,1])
degrees = [(i, j) for i in range(3) for j in range(3)] # list of monomials x**i * y**j to use
matrix = np.stack([np.prod(x**d, axis=1) for d in degrees], axis=-1) # stack monomials like columns
coeff = np.linalg.lstsq(matrix, z)[0] # lstsq returns some additional info we ignore
print("Coefficients", coeff) # in the same order as the monomials listed in "degrees"
fit = np.dot(matrix, coeff)
print("Fitted values", fit)
print("Original values", y)
I believe you have misunderstood what polyvander2d does and how it should be used. polyvander2d() returns the pseudo-Vandermonde matrix of degrees deg and sample points (x, y).
Here, y is not the value(s) of the polynomial at point(s) x but rather it is the y-coordinate of the point(s) and x is the x-coordinate. Roughly speaking, the returned array is a set of combinations of (x**i) * (y**j) and x and y are essentially 2D "mesh-grids". Therefore, both x and y must have identical shapes.
Your x and y, however, arrays have different shapes:
>>> x.shape
(100, 2)
>>> y.shape
(100,)
I do not believe numpy has a 5D-polyvander of the form polyvander5D(x, y, z, v, w, deg). Notice, all the variables here are coordinates and not the values of the polynomial p=p(x,y,z,v,w). You, however, seem to be using y (in the 2D case) as f.
It appears that numpy does not have 2D or higher equivalents for the polyfit() function. If your intention is to find the coefficients of the best-fitting polynomial in higher-dimensions, I would suggest that you generalize the approach described here: Equivalent of `polyfit` for a 2D polynomial in Python
The option isn't there because nobody wants to do that. Combine the polynomials linearly (f(x,y) = 1 + x + y + x^2 + y^2) and solve the system of equations yourself.

Scipy Minimization TNC Working, But Not CG

I'm trying to complete week 4 the Machine Learning course on Coursera. The assingment uses the MINST data for multi-class classification.
The dimensions are X (5000,401), y (5000,1), theta (10,401), which start off as arrays. X was inserted with 1's on the first feature column.
My cost and gradient functions are below:
def sigmoid(z):
g = 1 / (1 + np.exp(-z))
return g
def lrCostFunction(theta, X, y, my_lambda):
m = float(len(X))
theta = np.matrix(theta)
X = np.matrix(X)
y = np.matrix(y)
#cost function:
term1 = np.multiply(-y,np.log(sigmoid(X*theta.T)))
term2 = np.multiply((1-y),np.log(1-sigmoid(X*theta.T)))
reg = np.power(theta[:,1:theta.shape[1]],2)
J = np.sum(term1-term2)/m + (my_lambda/(2.0*m) * np.sum(reg))
return J
def gradient (theta, X, y, my_lambda):
m = float(len(X))
theta = np.matrix(theta)
X = np.matrix(X)
y = np.matrix(y)
#gradient:
error = sigmoid(X * theta.T) - y
g = (X.T * error /(m)).T + ((my_lambda/m) * theta)
g[0,0] = np.sum(np.multiply(error, X[:,0])) / m
return g
Here is my One vs All classification function with the TNC optimization:
def oneVsAll(X, y, num_labels, my_lambda):
m = float(X.shape[0])
n = float(X.shape[1])-1
all_theta = np.zeros((num_labels,n+1))
for K in range(1, num_labels + 1):
theta = np.zeros(n+1)
y_logical = np.array([1 if j == K else 0 for j in y]).reshape(m,1)
opt_theta = opt.minimize(fun=lrCostFunction, x0=theta, \
args=(X,y_logical,my_lambda), \
method='TNC', jac=gradient).x
all_theta[K-1,:] = opt_theta
return all_theta
When I try to run CG however, it returns the error at line 8: "shapes (1,401) and (1,401) not aligned: 401 (dim 1) != 1 (dim 0)":
def oneVsAll(X, y, num_labels, my_lambda):
m = float(X.shape[0])
n = float(X.shape[1])-1
all_theta = np.zeros((num_labels,n+1))
for K in range(1, num_labels + 1):
theta = np.zeros(n+1)
y_logical = np.array([1 if j == K else 0 for j in y]).reshape(m,1)
opt_theta = opt.fmin_cg(f=lrCostFunction, x0=theta, \
fprime=gradient, \
args=(X,y_logical,my_lambda))
all_theta[K-1,:] = opt_theta
return all_theta
I saw elsewhere that CG only likes 1-d vectors from y. If I try to flatten y or reduce its dimension, however, everything else breaks. Is it generally a bad idea to use np.matrix as oppose to use np.dot with arrays? I like being able to easily transpose with matrixes.
Any help would be greatly appreciated.

Categories