CVXPY error: "NotImplementedError: Strict inequalities are not allowed" - python

def PPNM_model(a,E, beta):
p = E.shape[1]
x = E*a
x = sum(x,beta*cp.square(x))
return x
def PPNM_model_cvxpy(a,E,beta):
first = E*a
second = beta*cp.square(first)
third = sum(first,second)
return third
def construct_ppnm_model(x_in,A,E, x_LMM, a_lmm):
p = E.shape[1]
d = E.shape[0]
N = A.shape[0]
x = np.zeros(shape=(N,d), dtype=np.float64)
a_ppnm = np.zeros(shape=(N,p), dtype=np.float64)
beta_ppnm = np.zeros(shape=(N,1), dtype=np.float64)
current_lmm = cp.Variable(p)
current_beta = cp.Variable()
b_min = 0
b_max = 100
all_zeros = np.squeeze(np.zeros(shape=(N,1), dtype=np.double))
sum_to_one_vector=np.ones(shape=(1,p),dtype=np.double)
for i in np.arange(0, N):
x_in_temp = x_in[i,:].astype(np.double)
current_lmm.value=a_lmm[i,]
objective =
cp.Minimize(cp.sum_squares((PPNM_model_cvxpy(current_lmm,
E, current_beta)) - x_in_temp))
constraints = [current_lmm >= 0,
current_lmm <= 1,
current_beta >= b_min,
current_beta <= b_max,
sum_to_one_vector*current_lmm == 1]
prob = cp.Problem(objective, constraints)
result = prob.solve()
a_ppnm[i,:]=current_lmm.value
beta_ppnm[i] = current_beta.value
current_vector = PPNM_model(a_ppnm[i,:], E, current_beta.value)
x[i,:]=current_vector
return x, a_ppnm, beta_ppnm
In this problem, matrix A is of shape (10000, 4) which is (total pixels, endmembers), E is of shape (198, 4) :(spectral bands, endmembers)
and x is of shape (10000, 198) :( pixels, spectral bands)
When I call the construct_ppnm_model like this:
x_ppnm, a_ppnm, beta_ppnm = construct_ppnm_model(hsi_2d, A, E, x_LMM, a_lmm)
I get the following error message:
NotImplementedError: Strict inequalities are not allowed.

Related

General minimal residual method with right-preconditioner of SSOR

I am trying to implement the algorithm of GMRES with right-preconditioner P for solving the linear system Ax = b . The code is running without error; however, it pops into unprecise result for me because the error I have is very large. For the GMRES method (without preconditioning matrix - remove P in the algorithm), the error I get is around 1e^{-12} and it converges with the same matrix.
import numpy as np
from scipy import sparse
import matplotlib.pyplot as plt
from scipy.linalg import norm as norm
import scipy.sparse as sp
from scipy.sparse import diags
"""The program is to split the matrix into D-diagonal; L: strictly lower matrix; U strictly upper matrix
satisfying: A = D - L - U """
def splitMat(A):
n,m = A.shape
if (n == m):
diagval = np.diag(A)
D = diags(diagval,0).toarray()
L = (-1)*np.tril(A,-1)
U = (-1)*np.triu(A,1)
else:
print("A needs to be a square matrix")
return (L,D,U)
"""Preconditioned Matrix for symmetric successive over-relaxation (SSOR): """
def P_SSOR(A,w):
## Split up matrix A:
L,D,U = splitMat(A)
Comp1 = (D - w*U)
Comp2 = (D - w*L)
Comp1inv = np.linalg.inv(Comp1)
Comp2inv = np.linalg.inv(Comp2)
P = w*(2-w)*np.matmul(Comp1inv, np.matmul(D,Comp2inv))
return P
"""GMRES_SSOR using right preconditioning P:
A - matrix of linear system Ax = b
x0 - initial guess
tol - tolerance
maxit - maximum iteration """
def myGMRES_SSOR(A,x0, b, tol, maxit):
matrixSize = A.shape[0]
e = np.zeros((maxit+1,1))
rr = 1
rstart = 2
X = x0
w = 1.9 ## in ssor
P = P_SSOR(A,w) ### preconditioned matrix
### Starting the GMRES ####
for rs in range(0,rstart+1):
### first check the residual:
if rr<tol:
break
else:
r0 = (b-A.dot(x0))
rho = norm(r0)
e[0] = rho
H = np.zeros((maxit+1,maxit))
Qcol = np.zeros((matrixSize, maxit+1))
Qcol[:,0:1] = r0/rho
for k in range(1, maxit+1):
### Arnodi procedure ##
Qcol[:,k] =np.matmul(np.matmul(A,P), Qcol[:,k-1]) ### This step applies P here:
for j in range(0,k):
H[j,k-1] = np.dot(np.transpose(Qcol[:,k]),Qcol[:,j])
Qcol[:,k] = Qcol[:,k] - (np.dot(H[j,k-1], Qcol[:,j]))
H[k,k-1] =norm(Qcol[:,k])
Qcol[:,k] = Qcol[:,k]/H[k,k-1]
### QR decomposition step ###
n = k
Q = np.zeros((n+1, n))
R = np.zeros((n, n))
R[0, 0] = norm(H[0:n+2, 0])
Q[:, 0] = H[0:n+1, 0] / R[0,0]
for j in range (0, n+1):
t = H[0:n+1, j-1]
for i in range (0, j-1):
R[i, j-1] = np.dot(Q[:, i], t)
t = t - np.dot(R[i, j-1], Q[:, i])
R[j-1, j-1] = norm(t)
Q[:, j-1] = t / R[j-1, j-1]
g = np.dot(np.transpose(Q), e[0:k+1])
Y = np.dot(np.linalg.inv(R), g)
Res= e[0:n] - np.dot(H[0:n, 0:n], Y[0:n])
rr = norm(Res)
#### second check on the residual ###
if rr < tol:
break
#### Updating the solution with the preconditioned matrix ####
X = X + np.matmul(np.matmul(P,Qcol[:, 0:k]), Y) ### This steps applies P here:
return X
######
A = np.random.rand(100,100)
x = np.random.rand(100,1)
b = np.matmul(A,x)
x0 = np.zeros((100,1))
maxit = 100
tol = 0.00001
x = myGMRES_SSOR(A,x0,b,tol,maxit)
res = b - np.matmul(A,x)
print(norm(res))
print("Solution with gmres\n", np.matmul(A,x))
print("---------------------------------------")
print("b matrix:", b)
I hope anyone could help me figure out this!!!
I'm not sure where you got you "Symmetric_successive_over-relaxation" SSOR code from, but it appears to be wrong. You also seem to be assuming that A is symmetric matrix, but in your random test case it is not.
Following SSOR's Wikipedia entry, I replaced your P_SSOR function with
def P_SSOR(A,w):
L,D,U = splitMat(A)
P = 2/(2-w) * (1/w*D+L)*np.linalg.inv(D)*(1/w*D+L).T
return P
and your test matrix with
A = np.random.rand(100,100)
A = A + A.T
and your code works up to a 12 digit residual error.

Calculate covariance of torch tensor (2d feature map)

I have a torch tensor with shape (batch_size, number_maps, x_val, y_val). The tensor is normalized with a sigmoid function, so within range [0, 1]. I want to find the covariance for each map, so I want to have a tensor with shape (batch_size, number_maps, 2, 2). As far as I know, there is no torch.cov() function as in numpy. How can I efficiently calculate the covariance without converting it to numpy?
Edit:
def get_covariance(tensor):
bn, nk, w, h = tensor.shape
tensor_reshape = tensor.reshape(bn, nk, 2, -1)
x = tensor_reshape[:, :, 0, :]
y = tensor_reshape[:, :, 1, :]
mean_x = torch.mean(x, dim=2).unsqueeze(-1)
mean_y = torch.mean(y, dim=2).unsqueeze(-1)
xx = torch.sum((x - mean_x) * (x - mean_x), dim=2).unsqueeze(-1) / (h*w - 1)
xy = torch.sum((x - mean_x) * (y - mean_y), dim=2).unsqueeze(-1) / (h*w - 1)
yx = xy
yy = torch.sum((y - mean_y) * (y - mean_y), dim=2).unsqueeze(-1) / (h*w - 1)
cov = torch.cat((xx, xy, yx, yy), dim=2)
cov = cov.reshape(bn, nk, 2, 2)
return cov
I tried the following now, but I m pretty sure it's not correct.
You could try the function suggested on Github:
def cov(x, rowvar=False, bias=False, ddof=None, aweights=None):
"""Estimates covariance matrix like numpy.cov"""
# ensure at least 2D
if x.dim() == 1:
x = x.view(-1, 1)
# treat each column as a data point, each row as a variable
if rowvar and x.shape[0] != 1:
x = x.t()
if ddof is None:
if bias == 0:
ddof = 1
else:
ddof = 0
w = aweights
if w is not None:
if not torch.is_tensor(w):
w = torch.tensor(w, dtype=torch.float)
w_sum = torch.sum(w)
avg = torch.sum(x * (w/w_sum)[:,None], 0)
else:
avg = torch.mean(x, 0)
# Determine the normalization
if w is None:
fact = x.shape[0] - ddof
elif ddof == 0:
fact = w_sum
elif aweights is None:
fact = w_sum - ddof
else:
fact = w_sum - ddof * torch.sum(w * w) / w_sum
xm = x.sub(avg.expand_as(x))
if w is None:
X_T = xm.t()
else:
X_T = torch.mm(torch.diag(w), xm).t()
c = torch.mm(X_T, xm)
c = c / fact
return c.squeeze()
https://github.com/pytorch/pytorch/issues/19037

python program is super slow, and I can't find out why

I'm writing a multi-layer perceptron from scratch and I think it's way slower than it should be. the culprit seems to be my compute_gradients-function, which according to my investigation answers for most of the execution time. It looks like this:
def compute_gradients(X, Y, S1, H, P, W1, W2, lamb):
# Y must be one-hot
# Y and P must be (10, n)
# X and H must be (3072, n)
# P is the softmax layer
if not (Y.shape[0] == 10 and P.shape[0] == 10 and Y.shape == P.shape):
raise ValueError("Y and P must have shape (10, k). Y: {}, P: {}".format(Y.shape, P.shape))
if not X.shape[0] == n_input:
raise ValueError("X must have shape ({}, k), has {}".format(n_input, X.shape))
if not H.shape[0] == n_hidden:
raise ValueError("H must have shape ({}, k)".format(n_hidden))
grad_W1 = np.zeros([n_hidden, n_input])
grad_W2 = np.zeros([10, n_hidden])
grad_b1 = np.zeros([n_hidden, 1])
grad_b2 = np.zeros([10, 1])
X, Y, H, P = X.T, Y.T, H.T, P.T
for x, y, s1, h, p in zip(X, Y, S1, H, P):
h = np.reshape(h, [1, n_hidden])
y = np.reshape(y, [10, 1])
p = np.reshape(p, [10, 1])
# Second layer
g = -(y-p).T
grad_b2 += g.T
grad_W2 += np.matmul(g.T, h)
# First layer
g = np.matmul(g, W2)
ind = np.zeros(h.shape[1])
for i, val in enumerate(s1):
if val > 0:
ind[i] = 1
diag = np.diag(ind)
g = np.matmul(g, diag)
grad_b1 += g.T
grad_W1 += np.matmul(g.T, np.reshape(x, [1, n_input]))
# Divide by batch size
grad_b1 /= X.shape[0]
grad_b2 /= X.shape[0]
grad_W1 /= X.shape[0]
grad_W2 /= X.shape[0]
# Add regularization term
grad_W1 += 2*lamb*W1
grad_W2 += 2*lamb*W2
return grad_W1, grad_W2, grad_b1, grad_b2
If X, Y, H, P are all 10 rows long (n=10), the computations take about 1 second. This is way too much compared to my friends who are doing the same task. But I can't see any obvious inefficiencies in my code. What can I do to speed the computations up?
EDIT: Input data is the CIFAR dataset. Load it like this:
def one_hot(Y):
# assume Y = [1, 4, 9, 0, ...]
result = [None]*len(Y)
for i, cls in enumerate(Y):
onehot = {
0: lambda: [1,0,0,0,0,0,0,0,0,0],
1: lambda: [0,1,0,0,0,0,0,0,0,0],
2: lambda: [0,0,1,0,0,0,0,0,0,0],
3: lambda: [0,0,0,1,0,0,0,0,0,0],
4: lambda: [0,0,0,0,1,0,0,0,0,0],
5: lambda: [0,0,0,0,0,1,0,0,0,0],
6: lambda: [0,0,0,0,0,0,1,0,0,0],
7: lambda: [0,0,0,0,0,0,0,1,0,0],
8: lambda: [0,0,0,0,0,0,0,0,1,0],
9: lambda: [0,0,0,0,0,0,0,0,0,1],
}[cls]()
result[i] = onehot
result = np.array(result).T
return result
def unpickle(file):
import pickle
with open(file, "rb") as fo:
d = pickle.load(fo, encoding="bytes")
return d
names = ["data_batch_1",
"data_batch_2",
"data_batch_3",
"data_batch_4",
"data_batch_5",
]
# All data sets
dataset_large = {"data": np.zeros([0, 3072]), "labels": np.array([])}
validation_large = {}
## All data batches
for name in names[0:4]:
raw = unpickle(os.path.join(path, name))
dataset_large["data"] = np.append(dataset_large["data"], raw[b"data"], axis = 0)
dataset_large["labels"] = np.append(dataset_large["labels"], raw[b"labels"], axis = 0)
raw = unpickle(os.path.join(path, names[4]))
dataset_large["data"] = np.append(dataset_large["data"], raw[b"data"][0: -1000], axis = 0)
dataset_large["labels"] = np.append(dataset_large["labels"], raw[b"labels"][0: -1000], axis = 0)
validation_large["data"] = raw[b"data"][-1000: ]
validation_large["labels"] = raw[b"labels"][-1000: ]
# Make one-hot
dataset_large["labels"] = one_hot(dataset_large["labels"]).T
validation_large["labels"] = one_hot(validation_large["labels"]).T
# Normalize
dataset_large["data"] = dataset_large["data"]/255
validation_large["data"] = validation_large["data"]/255
the dataset can be found at https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz . Then run like:
def evaluate_classifier(X, W1, W2, b1, b2):
if not X.shape[0] == n_input:
ValueError("Wrong shape of X: {}".format(X.shape))
if not len(X.shape) == 2:
ValueError("Wrong shape of X: {}".format(X.shape))
if not W1.shape == (n_hidden, n_input):
raise ValueError("Wrong shape of W1: {}".format(W1.shape))
if not b1.shape == (n_hidden, 1):
raise ValueError("Wrong shape of b1: {}".format(b1.shape))
if not W2.shape == (10, n_hidden):
raise ValueError("Wrong shape of W2: {}".format(W2.shape))
if not b2.shape == (10, 1):
raise ValueError("Wrong shape of b2: {}".format(b2.shape))
s1, h = layer_1(X, W1, b1)
p = layer_2(h, W2, b2)
return s1, h, p
W1 = np.random.normal(0, 0.01, [n_hidden, n_input])
W2 = np.random.normal(0, 0.01, [10, n_hidden])
b1 = np.random.normal(0, 0.1, [n_hidden, 1])
b2 = np.random.normal(0, 0.1, [10, 1])
X = dataset_large["data"][0:10]
Y = dataset_large["labels"][0:10]
S1, H, P = evaluate_classifier(X, W1, W2, b1, b2)
lamb = 0
compute_gradients(X, Y, S1, H, P, W1, W2, lamb)

How can I fit an equation which is not a function

Given data points in the xy plane, I would like to use scipy.optimize.leastsq to find fit parameters for an ellipse (which cannot be written as a function of x and y). I tried setting the entire equation equal to zero, and then fitting this function, but the fit is failing to converge with error output
"The relative error between two consecutive iterates is at most 0.000000."
The code is shown below, as well as the output. The fitter clearly does not find any reasonable parameters. My question is whether or not this is a problem with scipy.optimize.leastsq, or whether the "trick" of setting the function equal to zero and instead fitting that is not valid.
from scipy.optimize import leastsq, curve_fit
import numpy as np
import matplotlib.pyplot as plt
def function(x,y,theta,smaj,smin):
xp = np.cos(theta)*x - np.sin(theta)*y
yp = np.sin(theta)*x + np.cos(theta)*y
z = ((xp)**2)/smaj**2 + ((yp)**2)/smin**2
return z
def g(x,y,smaj,smin):
return x*x/smaj**2 + y*y/smin**2
def window(array,alt,arange):
arr = [array[i] for i,a in enumerate(alt) if a > arange[0] and a < arange[1]]
return np.asarray(arr)
def fitter(p0,x,y,func,errfunc,err):
# the fitter function
out = leastsq(errfunc,p0,args=(x,y,func,err),full_output=1)
pfinal = out[0]
covar = out[1]
mydict = out[2]
mesg = out[3]
ier = out[4]
resids = mydict['fvec']
chisq = np.sum(resids**2)
degs_frdm = len(x)-len(pfinal)
reduced_chisq = chisq/degs_frdm
ls = [pfinal,covar,mydict,mesg,ier,resids,chisq,degs_frdm,reduced_chisq]
print('fitter status: ', ier, '-- aka -- ', mesg)
i = 0
if covar is not None:
if (ier == 1 or ier == 2 or ier == 3 or ier == 4):
for u in pfinal:
print ('Param', i+1, ': ',u, ' +/- ', np.sqrt(covar[i,i]))
i = i + 1
print ('reduced chisq',reduced_chisq)
else:
print('fitter failed')
return ls
def func(x,y,p):
x = x-p[3]
y = y-p[4]
xp = np.cos(p[0])*(x) - np.sin(p[0])*(y)
yp = np.sin(p[0])*(x) + np.cos(p[0])*(y)
z = ((xp)**2)/p[1]**2 + ((yp)**2)/p[2]**2 - 1
return z
def errfunc(p,x,y,func,err):
return (y-func(x,y,p))/err
t = np.linspace(0,2*np.pi,100)
xx = 5*np.cos(t); yy = np.sin(t)
p0 = [0,5,1,0,0]
sigma = np.ones(len(xx))
fit = fitter(p0,xx,yy,func,errfunc,sigma)
params = fit[0]
covariance = fit[1]
residuals = fit[5]
t = np.linspace(0,2*np.pi,100)
xx = 5*np.cos(t); yy = np.sin(t)
plt.plot(xx,yy,'bx',ms = 4)
xx = np.linspace(-10,10, 1000)
yy = np.linspace(-5, 5, 1000)
newx = []
newy = []
for x in xx:
for y in yy:
if 0.99 < func(x,y,params) < 1.01:
#if g(x,y,5,1) == 1:
newx.append(x)
newy.append(y)
plt.plot(newx,newy,'kx',ms = 1)
plt.show()
The blue crosses are the actual data, and the black line is the fitters guess at the parameters.

Scipy Minimization TNC Working, But Not CG

I'm trying to complete week 4 the Machine Learning course on Coursera. The assingment uses the MINST data for multi-class classification.
The dimensions are X (5000,401), y (5000,1), theta (10,401), which start off as arrays. X was inserted with 1's on the first feature column.
My cost and gradient functions are below:
def sigmoid(z):
g = 1 / (1 + np.exp(-z))
return g
def lrCostFunction(theta, X, y, my_lambda):
m = float(len(X))
theta = np.matrix(theta)
X = np.matrix(X)
y = np.matrix(y)
#cost function:
term1 = np.multiply(-y,np.log(sigmoid(X*theta.T)))
term2 = np.multiply((1-y),np.log(1-sigmoid(X*theta.T)))
reg = np.power(theta[:,1:theta.shape[1]],2)
J = np.sum(term1-term2)/m + (my_lambda/(2.0*m) * np.sum(reg))
return J
def gradient (theta, X, y, my_lambda):
m = float(len(X))
theta = np.matrix(theta)
X = np.matrix(X)
y = np.matrix(y)
#gradient:
error = sigmoid(X * theta.T) - y
g = (X.T * error /(m)).T + ((my_lambda/m) * theta)
g[0,0] = np.sum(np.multiply(error, X[:,0])) / m
return g
Here is my One vs All classification function with the TNC optimization:
def oneVsAll(X, y, num_labels, my_lambda):
m = float(X.shape[0])
n = float(X.shape[1])-1
all_theta = np.zeros((num_labels,n+1))
for K in range(1, num_labels + 1):
theta = np.zeros(n+1)
y_logical = np.array([1 if j == K else 0 for j in y]).reshape(m,1)
opt_theta = opt.minimize(fun=lrCostFunction, x0=theta, \
args=(X,y_logical,my_lambda), \
method='TNC', jac=gradient).x
all_theta[K-1,:] = opt_theta
return all_theta
When I try to run CG however, it returns the error at line 8: "shapes (1,401) and (1,401) not aligned: 401 (dim 1) != 1 (dim 0)":
def oneVsAll(X, y, num_labels, my_lambda):
m = float(X.shape[0])
n = float(X.shape[1])-1
all_theta = np.zeros((num_labels,n+1))
for K in range(1, num_labels + 1):
theta = np.zeros(n+1)
y_logical = np.array([1 if j == K else 0 for j in y]).reshape(m,1)
opt_theta = opt.fmin_cg(f=lrCostFunction, x0=theta, \
fprime=gradient, \
args=(X,y_logical,my_lambda))
all_theta[K-1,:] = opt_theta
return all_theta
I saw elsewhere that CG only likes 1-d vectors from y. If I try to flatten y or reduce its dimension, however, everything else breaks. Is it generally a bad idea to use np.matrix as oppose to use np.dot with arrays? I like being able to easily transpose with matrixes.
Any help would be greatly appreciated.

Categories