Overflow encountered in square - python

I have tried searching for the overflow error that I'm getting, but I did not succeed.
When I run this program, I get runtime errors that in no way makes any sense to me.
and here is the data i used: https://pastebin.com/MLWvUarm
import numpy as np
def loadData():
data = np.loadtxt('data.txt', delimiter=',')
x = np.c_[data[:,0:2]]
y = np.c_[data[:,-1]]
return x, y
def hypothesis(x, theta):
h = x.dot(theta)
return h
def computeCost(x, y, theta):
m = np.size(y, 0)
h = hypothesis(x, theta)
J = (1/(2*m)) * np.sum(np.square(h-y))
return J
def gradient_descent(x, y, theta, alpha, mxIT):
m = np.size(y, 0)
J_history = np.zeros((mxIT, 1))
for it in range(mxIT):
hyp = hypothesis(x, theta)
err = hyp - y
theta = theta - (alpha/m) * (x.T.dot(err))
J_history[it] = computeCost(x, y, theta)
return theta, J_history
def main():
x, y = loadData()
x = np.c_[np.ones(x.shape[0]), x]
theta = np.zeros((np.size(x, 1), 1))
alpha = 0.01
mxIT = 400
theta, j_his = gradient_descent(x, y, theta, alpha, mxIT)
print(theta)
if __name__ == "__main__":
main()
How do I solve this problem?

After loading x, try to divide it by the mean and see if it converges. Link to documentation for mean: numpy.mean
...
x, y = loadData()
x = x / x.mean(axis=0, keepdims=True)
x = np.c_[np.ones(x.shape[0]), x]
...
Currently it seems to diverge and this produces very high errors which numpy complains about. You can see this from the cost history which you maintain in J_history.

Related

Learning parameters with gradient descent

I just started a ML course and I'm trying to run gradient descent in python. The below functions work fine, but as I move on to the bigger chunk where I do the actual learning, I just can't get the expected output and learn the right parameters, as you can tell from this decision boundary I plotted afterwards. And I'm trying to figure out why.
plotting the decision boundary
def sigmoid(z):
sigma = 1/(1+np.exp(-z))
return sigma
def compute_cost(X, y, w, b):
y_hat = sigmoid((X * np.expand_dims(w, axis=0)).sum(axis=1) + b)
total_cost = (-y * np.log(y_hat) - (1-y) * np.log(1-y_hat)).mean()
return total_cost
def compute_gradient(X, y, w, b):
z = w * X + b
yhat = sigmoid(z)
y1 = np.expand_dims(y, axis=1)
error = yhat - y1
db = error.mean()
dw_j1 = (X * error)
dw_j = np.mean(dw_j1,axis=0)
return dw_j, db
Before building this gradient descent function, I tested all the above with my training data & they all work and output the correct numbers. Really appreciate it if you can spot my mistakes.
Learning parameters with gradient descent
def gradient_descent(X, y, w, b, alpha, num_iters):
m = len(X)
J_history = []
wb_history = []
for i in range(num_iters):
cost = compute_cost(X, y, w, b)
dw_j, db = compute_gradient(X, y, w, b)
w = w - alpha * dw_j
b = b - alpha * db
wb_history.append((w,b))
J_history.append(cost)
if i % math.ceil(num_iters/10) == 0 or i == (num_iters-1):
print(f"Iteration {i:4}: Cost {float(J_history[-1]):8.2f}")
return w, b, J_history, wb_history
np.random.seed(1)
initial_w = 0.01 * (np.random.rand(2) - 0.5)
initial_b = -8
iterations = 10000
alpha = 0.001
w, b, J_history, _ = gradient_descent(X_train ,y_train, initial_w, initial_b, alpha, iterations)

scipy.optimize.fmin_cg function need two callable function f and fprime, how could I extract two function from a function that return both two values?

def linear_regression(theta, X, y, lamb):
# X(12,1+1) theta(2,1) y(12,1)
m = X.shape[0]
ones = np.ones([m, 1])
X = np.hstack([ones, X])
h = X.dot(theta)
# cost function
J = 1 / 2 / m * np.sum(np.power(h - y, 2)) + lamb / 2 / m * np.sum(np.power(theta[1:], 2))
# gradient X(12,2) X.T(2,12) (h-y)(12,1) sum_error(2,1)\
sum_error = 1 / m * X.T.dot(h - y)
temp = theta
temp[0] = 0
gradient = sum_error + lamb / m * temp
return J, gradient
def f(theta, X, y, lamb):
J, gradient = linear_regression(theta, X, y, lamb)
return J
def fprime(theta, X, y, lamb):
J, gradient = linear_regression(theta, X, y, lamb)
return gradient
J, gradient = linear_regression(theta,X,y,1)
# theta need to be a vector not matrix
result = opt.fmin_cg(f, theta, fprime=fprime, args=(X,y,1))
print(result[0])
Explain:
opt.fmin_cg(f, theta, fprime=fprime, args=(X,y,1)) need a callable function f and fprime
f, fprime is the return value of linear_regression(theta, X, y, lamb)
It is easy to compute the cost function and gradient in the same function
Question:
Is there an easy way to extract two callable function from linear_regression(theta, X, y, lamb)
calling J, gradient = linear_regression(theta,X,y,1) and pass to opt.fmin_cg(J, theta, fprime=gradient , args=(X,y,1)) is not work

Linear Regression for multi variable not working as expected

When I use this code for Single variable Linear regression, the theta is being evaluated correctly but when on the multi variable it is giving weird output for theta.
I am trying to convert my octave code, that I wrote when I took Andrew Ng's course.
This is the main calling file:
m = data.shape[0]
a = np.array(data[0])
a.shape = (m,1)
b = np.array(data[1])
b.shape = (m, 1)
x = np.append(a, b, axis=1)
y = np.array(data[2])
lr = LR.LinearRegression()
[X, mu, sigma] = lr.featureNormalize(x)
z = np.ones((m, 1), dtype=float)
X = np.append(z, X, axis=1)
alpha = 0.01
num_iters = 400
theta = np.zeros(shape=(3,1))
[theta, J_history] = lr.gradientDescent(X, y, theta, alpha, num_iters)
print(theta)
And here are the contents of class :
class LinearRegression:
def featureNormalize(self, data):#this normalizes the features
data = np.array(data)
x_norm = data
mu = np.zeros(shape=(1, data.shape[1]))#creates mu vector filled with zeros
sigma = np.zeros(shape=(1, data.shape[1]))
for i in range(0, data.shape[1]):
mu[0, i] = np.mean(data[:, i])
sigma[0, i] = np.std(data[:, i])
for i in range(0, data.shape[1]):
x_norm[:, i] = np.subtract(x_norm[:, i], mu[0, i])
x_norm[:, i] = np.divide(x_norm[:, i], sigma[0, i])
return [x_norm, mu, sigma]
def gradientDescent(self, X, y, theta, alpha, num_iters):
m = y.shape[0]
J_history = np.zeros(shape=(num_iters, 1))
for i in range(0, num_iters):
predictions = X.dot(theta) # X is 47*3 theta is 3*1 predictions is 47*1
theta = np.subtract(theta , (alpha / m) * np.transpose((np.transpose(np.subtract(predictions ,y))).dot(X))) #1*97 into 97*3
J_history[i] = self.computeCost(X, y, theta)
return [theta, J_history]
def computeCost(self, X, y, theta):
warnings.filterwarnings('ignore')
m = X.shape[0]
J = 0
predictions = X.dot(theta)
sqrErrors = np.power(predictions - y, 2)
J = 1 / (2 * m) * np.sum(sqrErrors)
return J
I expected a theta that'll be a 3*1 matrix. According to Andrew's course my octave implementation was producing a theta
334302.063993
100087.116006
3673.548451
But in python implementation I am getting very weird output:
[[384596.12996714 317274.97693463 354878.64955708 223121.53576488
519238.43603216 288423.05420641 302849.01557052 191383.45903309
203886.92061274 233219.70871976 230814.42009498 333720.57288972
317370.18827964 673115.35724932 249953.82390212 432682.6678475
288423.05420641 192249.97844569 480863.45534211 576076.72380674
243221.70859887 245241.34318985 233604.4010228 249953.82390212
551937.2817908 240336.51632605 446723.93690857 451051.7253178
456822.10986344 288423.05420641 336509.59208678 163398.05571747
302849.01557052 557707.6...................... this goes on for long
The same code is working absolutely fine in Single Variable dataset. It is also working fine in the octave but seems like I am missing some point for 2+ hours now. Happy to get your help.
Try in gradientDescent the following second line of the for loop:
theta=theta-(alpha/m)*X.T.dot(X.dot(theta)-y)
Also, if you want to add a column of ones, it is easier to do like so:
np.c_[np.ones((m,1)),data]

Linear Regression with Gradient Descent in Python with numpy

I'm trying to implement in Python the first exercise of Andrew NG's Coursera Machine Learning course. In the course the exercise is with Matlab/Octave, but I wanted to implement it in Python as well.
The problem is that the line that updates theta values, does not seem to be working right, is returning values ​​[[0.72088159] [0.72088159]] but should return [[-3.630291] [1.166362]]
I'm using a learning rate of 0.01 and the gradient loop was set to 1500 (the same values ​​from the original exercise in Octave).
And obviously, with these wrong values ​​for theta, the predictions are not correct as shown in the last chart.
In the rows in which I tesyo the cost function with theta values ​​defined as [0; 0] and [-1; 2], the results are correct (the same as the exercise in Octave), so the error can only be in the function of the gradient, but I do not know what went wrong.
I wanted someone to help me figure out what I'm doing wrong. I'm grateful already.
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
def load_data():
X = np.genfromtxt('data.txt', usecols=(0), delimiter=',', dtype=None)
y = np.genfromtxt('data.txt', usecols=(1), delimiter=',', dtype=None)
X = X.reshape(1, X.shape[0])
y = y.reshape(1, y.shape[0])
ones = np.ones(X.shape)
X = np.append(ones, X, axis=0)
theta = np.zeros((2, 1))
return (X, y, theta)
alpha = 0.01
iter_num = 1500
debug_at_loop = 10
def plot(x, y, y_hat=None):
x = x.reshape(x.shape[0], 1)
plt.xlabel('x')
plt.ylabel('hΘ(x)')
plt.ylim(ymax = 25, ymin = -5)
plt.xlim(xmax = 25, xmin = 5)
plt.scatter(x, y)
if type(y_hat) is np.ndarray:
plt.plot(x, y_hat, '-')
plt.show()
plot(X[1], y)
def hip(X, theta):
return np.dot(theta.T, X)
def cost(X, y, theta):
m = y.shape[1]
return np.sum(np.square(hip(X, theta) - y)) / (2 * m)
print('With theta = [0 ; 0]')
print('Cost computed =', cost(X, y, np.array([0, 0])))
print()
print('With theta = [-1 ; 2]')
print('Cost computed =', cost(X, y, np.array([-1, 2])))
def grad(X, y, alpha, theta, iter_num=1500, debug_cost_at_each=10):
J = []
m = y.shape[1]
for i in range(iter_num):
theta -= ((alpha * 1) / m) * np.sum(np.dot(hip(X, theta) - y, X.T))
if i % debug_cost_at_each == 0:
J.append(round(cost(X, y, theta), 6))
return J, theta
X, y, theta = load_data()
J, fit_theta = grad(X, y, alpha, theta)
print('Theta found by Gradient Descent:', fit_theta)
# Predict values for population sizes of 35,000 and 70,000
predict1 = np.dot(np.array([[1], [3.5]]).T, fit_theta);
print('For population = 35,000, we predict a profit of \n', predict1 * 10000);
predict2 = np.dot(np.array([[1], [7]]).T, fit_theta);
print('For population = 70,000, we predict a profit of \n', predict2 * 10000);
pred_y = hip(X, fit_theta)
plot(X[1], y, pred_y.T)
The data I'm using is the following txt:
6.1101,17.592
5.5277,9.1302
8.5186,13.662
7.0032,11.854
5.8598,6.8233
8.3829,11.886
7.4764,4.3483
8.5781,12
6.4862,6.5987
5.0546,3.8166
5.7107,3.2522
14.164,15.505
5.734,3.1551
8.4084,7.2258
5.6407,0.71618
5.3794,3.5129
6.3654,5.3048
5.1301,0.56077
6.4296,3.6518
7.0708,5.3893
6.1891,3.1386
20.27,21.767
5.4901,4.263
6.3261,5.1875
5.5649,3.0825
18.945,22.638
12.828,13.501
10.957,7.0467
13.176,14.692
22.203,24.147
5.2524,-1.22
6.5894,5.9966
9.2482,12.134
5.8918,1.8495
8.2111,6.5426
7.9334,4.5623
8.0959,4.1164
5.6063,3.3928
12.836,10.117
6.3534,5.4974
5.4069,0.55657
6.8825,3.9115
11.708,5.3854
5.7737,2.4406
7.8247,6.7318
7.0931,1.0463
5.0702,5.1337
5.8014,1.844
11.7,8.0043
5.5416,1.0179
7.5402,6.7504
5.3077,1.8396
7.4239,4.2885
7.6031,4.9981
6.3328,1.4233
6.3589,-1.4211
6.2742,2.4756
5.6397,4.6042
9.3102,3.9624
9.4536,5.4141
8.8254,5.1694
5.1793,-0.74279
21.279,17.929
14.908,12.054
18.959,17.054
7.2182,4.8852
8.2951,5.7442
10.236,7.7754
5.4994,1.0173
20.341,20.992
10.136,6.6799
7.3345,4.0259
6.0062,1.2784
7.2259,3.3411
5.0269,-2.6807
6.5479,0.29678
7.5386,3.8845
5.0365,5.7014
10.274,6.7526
5.1077,2.0576
5.7292,0.47953
5.1884,0.20421
6.3557,0.67861
9.7687,7.5435
6.5159,5.3436
8.5172,4.2415
9.1802,6.7981
6.002,0.92695
5.5204,0.152
5.0594,2.8214
5.7077,1.8451
7.6366,4.2959
5.8707,7.2029
5.3054,1.9869
8.2934,0.14454
13.394,9.0551
5.4369,0.61705
Well, I got it after losing several strands of hair (the programming will still leave me bald).
It was on the gradient line, and the solution was this:
theta -= ((alpha * 1) / m) * np.dot(X, (hip(X, theta) - y).T)
I changed the place of X and transposed the error vector.

Extracting 1D ellipse from 2D image

I've trying to simulate a 2D Sérsic profile and then testing an extraction routine on it. However, when I do a test by extracting all the points lying along an ellipse supposedly aligned with an image, I get a periodic function. It is meant to be a straight line since all points along the ellipse should have equal intensity, although there will be a small amount of deviation due to rounding errors in the rough coordinate estimation (get_I()).
from __future__ import division
import numpy as np
import matplotlib.pyplot as plt
from scipy.interpolate import NearestNDInterpolator
def rotate(x, y, angle):
x1 = x*np.cos(angle) + y*np.sin(angle)
y1 = y*np.cos(angle) - x*np.sin(angle)
return x1, y1
def sersic_1d(R, mu0, h, n, zp=0):
exponent = (R / h) ** (1 / n)
I0 = np.exp((zp - mu0) / 2.5)
return I0 * np.exp(-1.* exponent)
def sersic_2d(x, y, e, i, mu0, h, n, zp=0):
xp, yp = rotate(x, y, i)
alpha = np.arctan2(yp, xp * (1-e))
a = xp / np.cos(alpha)
b = a * (1 - e)
# R2 = (a*a) + ((1 - (e*e)) * yp*yp)
return sersic_1d(a, mu0, h, n, zp)
def ellipse(x0, y0, a, e, i, theta):
b = a * (1 - e)
x = a * np.cos(theta)
y = b * np.sin(theta)
x, y = rotate(x, y, i)
return x + x0, y + y0
def get_I(x, y, Z):
return Z[np.round(x).astype(int), np.round(y).astype(int)]
if __name__ == '__main__':
n = np.linspace(-100,100,1000)
nx, ny = np.meshgrid(n, n)
Z = sersic_2d(nx, ny, 0.5, 0., 0, 50, 1, 25)
theta = np.linspace(0, 2*np.pi, 1000.)
a = 100.
e = 0.5
i = np.pi / 4.
x, y = ellipse(0, 0, a, e, i, theta)
I = get_I(x, y, Z)
plt.plot(I)
# plt.imshow(Z)
plt.show()
However, What I actually get is a massive periodic function. I've checked the alignment and it's correct and the float-> int rounding errors can't account for this kind of shift?
Any ideas?
There are two things that strike me as odd, one of which for sure is not what you wanted, the other I'm not sure about because astronomy is not my field of expertise.
The first is in your function get_I:
def get_I(x, y, Z):
return Z[np.round(x).astype(int), np.round(y).astype(int)]
When you call that function, x an y outline an ellipse, with its center at the origin (0,0). That means x and y both become negative at some point. The indexing you perfom in that function will then take values from the array's last elements, because Z[0,0] is in fact the top left corner of the image (which you plotted, but commented), while Z[-1, -1] is the bottom right corner. What you want is to take the values of Z that are on the ellipse contour, but both have to have the same center. To do that, you would first make sure you use an uneven amount of samples for n (which ultimately defines the shape of Z) and second, you would add an indexing offset:
def get_I(x, y, Z):
offset = Z.shape[0]//2
return Z[np.round(y).astype(int) + offset, np.round(x).astype(int) + offset]
...
n = np.linspace(-100,100,1001) # changed from 1000 to 1001 to ensure a point of origin is present and that the image exhibits point symmetry
Also notice that I changed the order of y and x in get_I: that's because you first index along the rows (for which we usually take the y-coordinate) and only then along the columns (which map to the x-coordinate in most conventions).
The second item that struck me as unusual is that your ellipse has its axes at an angle of pi/4 with respect to the horizontal axis, whereas your sersic (which maps to the 2D array of Z) does not have a tilt at all.
Changing all that, I end up with this code:
from __future__ import division
import numpy as np
import matplotlib.pyplot as plt
def rotate(x, y, angle):
x1 = x*np.cos(angle) + y*np.sin(angle)
y1 = y*np.cos(angle) - x*np.sin(angle)
return x1, y1
def sersic_1d(R, mu0, h, n, zp=0):
exponent = (R / h) ** (1 / n)
I0 = np.exp((zp - mu0) / 2.5)
return I0 * np.exp(-1.* exponent)
def sersic_2d(x, y, e, ang, mu0, h, n, zp=0):
xp, yp = rotate(x, y, ang)
alpha = np.arctan2(yp, xp * (1-e))
a = xp / np.cos(alpha)
b = a * (1 - e)
return sersic_1d(a, mu0, h, n, zp)
def ellipse(x0, y0, a, e, i, theta):
b = a * (1 - e) # half of a
x = a * np.cos(theta)
y = b * np.sin(theta)
x, y = rotate(x, y, i) # rotated by 45deg
return x + x0, y + y0
def get_I(x, y, Z):
offset = Z.shape[0]//2
return Z[np.round(y).astype(int) + offset, np.round(x).astype(int) + offset]
#return Z[np.round(y).astype(int), np.round(x).astype(int)]
if __name__ == '__main__':
n = np.linspace(-100,100,1001) # changed
nx, ny = np.meshgrid(n, n)
ang = 0;#np.pi / 4.
Z = sersic_2d(nx, ny, 0.5, ang=0, mu0=0, h=50, n=1, zp=25)
f, ax = plt.subplots(1,2)
dn = n[1]-n[0]
ax[0].imshow(Z, cmap='gray', aspect='equal', extent=[-100-dn/2, 100+dn/2, -100-dn/2, 100+dn/2])
theta = np.linspace(0, 2*np.pi, 1000.)
a = 20. # decreased long axis of ellipse to see the intensity-map closer to the "center of the galaxy"
e = 0.5
x, y = ellipse(0,0, a, e, ang, theta)
I = get_I(x, y, Z)
ax[0].plot(x,y) # easier to see where you want the intensities
ax[1].plot(I)
plt.show()
and this image:
The intensity variations look like quantisation noise to me, with the exception of the peaks, which are due to the asymptote in sersic_1d.

Categories