Cost function is not converging - python

I am naive in Machine Learning ,following text book (Pyhton Machine Learning ) and online course on coursera . I am trying to implement single perceptron algorithm on standard iris dataset containing only two classes ('sentosa' and 'versicolor') but error function is not converging .Here is my code :-
import numpy as np
from sklearn import datasets
import matplotlib.pyplot as plt
class perceptron(object):
def __init__(self,a,iter):
self.a=a
self.iter=iter
def fit(self,x,y):
self.w_=np.zeros(1+x.shape[1])
self.errors_=[]
for i in range(self.iter):
errors = 0
for xi ,target in zip(x,y):
update=self.a*(target-self.predict(xi))
self.w_[1:]=xi*update
self.w_[0]=update
errors+=int(update != 0.0)
self.errors_.append(errors)
print(self.errors_)
return self
def net_input(self,x):
return np.dot(x,self.w_[1:])
def predict(self,x):
return np.where(self.net_input(x)>=0.0,1,-1)
iris=datasets.load_iris()
x=iris.data[:100,:2]
y=iris.target
y=np.where(y==0,-1,1)
ppn=perceptron(a=0.01,iter=10)
ppn.fit(x,y)
plt.plot(range(1, len(ppn.errors_) + 1),ppn.errors_,marker='_')
plt.xlabel('epochs')
plt.ylabel('number of classification')
plt.show()
Number of misclassification (errors) remains same in every iteration

These lines are wrong:
self.w_[1:]=xi*update
self.w_[0]=update
Change them to:
self.w_[1:] += update * xi
self.w_[0] += update
It also looks like your input implementation is wrong:
def net_input(self,x):
return np.dot(x,self.w_[1:])
Should be:
return np.dot(X, self.w_[1:]) + self.w_[0]
You can see the full implementation on my github
Let me know if that doesn't solve your problem.

Related

Are optimisation builtin functions of Matlab better than Python?

all. I encountered a case where minimisation results of Matlab are very close to mathematical solution(i.e., when we solve equations by hand) when compared to the results obtained from Python's scipy minimize builtin function. I'm not sure where i'm doing wrong or how to improve the results in python. Any suggestion would be of great help.
Aim of this problem is to find the time period of set nonlinear differential equations without time evolving. For of test case I took the problem from "This place".
Non-Linear Differential equations looks like this
Here i'm implementing pseudo spectral method for periodic systems. Implementation method is similar to what described here , only change is i'm taking uniform points and "D" matrix is formed using Pseudospectral.
import numpy as np
from numpy import linalg as LA
import ast
from ast import literal_eval as make_tuple
import scipy
from scipy.optimize import minimize
from scipy.linalg import toeplitz
import matplotlib.pyplot as plt
%matplotlib tk
# This "Dmatrix" is used to get derivative.
def Dmatrix(N):
h = 2.0*np.pi/N;
col = np.zeros(N);
col[1:] = 0.5*(-1.0)**np.arange(1,N)/np.sin(np.arange(1,N)*h/2.0);
row = np.zeros(N); row[0] = col[0]; row[1:] = col[N-1:0:-1]
D = toeplitz(col,row);
return D
# Actual differential equations.
def dxD(x,y,t):
u=(1-(x**2)/4 - (y**2));
dx=-4*y+x*u;
dy=x+y*u;
return np.array([dx,dy])
# Implementing Pseudo spectral method
def dxFdxD(initial_guess,final_time):
N=len(initial_guess)//2;
x_guess=initial_guess[:N];
xl=np.array(x_guess[:]);
y_guess=initial_guess[N:2*N];
yl=np.array(y_guess[:]);
tf=final_time;
tl=np.arange(1,N+1)*tf/N;
D=Dmatrix(N);
XYTzipped=zip(xl,yl,tl);
dX_D=np.array([dxDynamics(xs,ys,ts) for xs,ys,ts in XYTzipped ]);
xlyl=np.array([xl,yl]).transpose();
dX_F=(np.array(D#xlyl))*(2*np.pi/tf);
err=np.array(dX_D - dX_F).flatten();
normError= LA.norm(err, 2);
return normError
# Initial guess points
N=201;
final_time=1.052*np.pi;
tf=final_time;
tgrid=np.arange(1,N+1)*tf/N;
xguess=np.cos(tgrid)*2.0;
yguess=-np.cos(tgrid)*0.5;
tfl=np.pi*0.85;
tfu=1.5*np.pi;
tfbounds=(tfl,tfu);
xstates= np.array([xguess,yguess]).flatten();
xstatesParameter=np.array([xstates,final_time], dtype=object);
xins=np.hstack(xstatesParameter).tolist();
# Objective function for optimising
def obj(x):
N=(len(x)-1)//2;
tf=x[-1];
xylist=x[:2*N];
return dxFdxD(xylist,tf)
# Optimization using method='trust-constr'
l1=[tfbounds];
str1=str([bounds123 for bounds123 in l1]);
str2=str1.replace("[", "").replace("]", "")
bounds1=make_tuple("("+ "(-5,5),(-5,5),"*N + str2+ ")")
bnds=bounds1;
# constraint
def xyradius(x):
nps=(len(x)-1)//2;
xs=x[:nps];
ys=x[nps:2*nps];
xsysZip=zip(xs,ys)
truelist=[bool((xi**2)+(yi**2)>0.25) for xi,yi in xsysZip]
result=int(all(truelist))
return result
xyradiusConstraintType={'type':'ineq','fun':xyradius};
cons=[xyradiusConstraintType]
# Minimising "obj"
sol=minimize(obj,
xins,
method='trust-constr',
bounds=bnds,
tol=1e-10)
# Results
x_y_tf=sol.x;
x_F=x_y_tf[:N];
y_F=x_y_tf[N:2*N];
tf_system=x_y_tf[-1];
print("time period tf=",tf_system,end="\n \n")
tgrid=np.arange(1,N+1)*tf/N;
# Plots
fig = plt.figure(1)
ax = fig.add_subplot(111)
#specify label for the corresponding curve
# ax.set_xticks(tgrid, minor=False)
ax.set_xticks(tgrid, minor=True)
ax.xaxis.grid(True, which='major')
ax.xaxis.grid(True, which='minor')
ax.set_title('Collocation points')
plt.plot(tgrid,x_F,label='x result')
plt.plot(tgrid,y_F,label='y result')
ax.set_title('Optimized result x,y')
plt.legend()
plt.show()
# Parametric plot
ax = plt.figure(4).add_subplot()
ax.plot(x_F,y_F,label='State Space')
ax.legend()
plt.show()
Optimizing(Minimizing) using method='SLSQP'
# Scipy for minimization using method='SLSQP'
l1=[tfbounds];
str1=str([bounds123 for bounds123 in l1]);
str2=str1.replace("[", "").replace("]", "")
bounds1=make_tuple("("+ "(-5,5),(-5,5),"*N + str2+ ")")
bnds=bounds1;
def xyradius(x):
nps=(len(x)-1)//2;
xs=x[:nps];
ys=x[nps:2*nps];
xsysZip=zip(xs,ys)
truelist=[bool((xi**2)+(yi**2)>0.25) for xi,yi in xsysZip]
result=int(all(truelist))
return result
xyradiusConstraintType={'type':'ineq','fun':xyradius};
cons=[xyradiusConstraintType]
sol=minimize(obj,
xins,
method='SLSQP',
bounds=bnds,
constraints=cons,
tol=1e-10)
When I implemented the same work in MatLab . I got "pi= 3.14" as the solution(time period of system), where as when in python i'm getting "4.70" as time period. Any suggestions are greatly appreciated. Thank you

How to use external model data with Emukit python package

I am implementing this code (found here: https://emukit.readthedocs.io/en/latest/notebooks/Emukit-tutorial-custom-model.html)
import numpy as np
from emukit.experimental_design import ExperimentalDesignLoop
from emukit.core import ParameterSpace, ContinuousParameter
from emukit.core.loop import UserFunctionWrapper
from sklearn.gaussian_process import GaussianProcessRegressor
x_min = -30.0
x_max = 30.0
X = np.random.uniform(x_min, x_max, (10, 1))
Y = np.sin(X) + np.random.randn(10, 1) * 0.05
sklearn_gp = GaussianProcessRegressor();
sklearn_gp.fit(X, Y);
from emukit.core.interfaces import IModel
class SklearnGPModel(IModel):
def __init__(self, sklearn_model):
self.model = sklearn_model
def predict(self, X):
mean, std = self.model.predict(X, return_std=True)
return mean[:, None], np.square(std)[:, None]
def set_data(self, X: np.ndarray, Y: np.ndarray) -> None:
self.model.fit(X, Y)
def optimize(self, verbose: bool = False) -> None:
# There is no separate optimization routine for sklearn models
pass
#property
def X(self) -> np.ndarray:
return self.model.X_train_
#property
def Y(self) -> np.ndarray:
return self.model.y_train_
emukit_model = SklearnGPModel(sklearn_gp)
p = ContinuousParameter('c', x_min, x_max)
space = ParameterSpace([p])
loop = ExperimentalDesignLoop(space, emukit_model)
loop.run_loop(np.sin, 50)
I am trying to implement this code but with the exteral data set. To do this, I need to understand if I can extract the 50 x-values propagated through the np.sin function when the loop.run_loop(np.sin, 50) is executed. Then, having obtained these 50 inputs (x-values), I need to propagate them in an external software, which saves the result as .csv file.
The information that I would have, that needs to be "put through" the loop.run_loop() is as follows:
So, I need to make the loop.run_loop() code work by loading an external results data but do now know how to implement that.
If i understand your question correctly, passing data does not make sense in this context. The default acquisition function will select the next input (or experiment) based on the your model. Your model is updated at each iteration from the outcome of your experiment and the next experiment is dependent on previous observations - it's not random.
Passing your samples independently of this loop would be significantly less informative.
In short, you need to define a function similar to np.sin that can be queried.
Hope this makes sense!

Python Information gain implementation

I am currently using scikit-learn for text classification on the 20ng dataset. I want to calculate the information gain for a vectorized dataset. It has been suggested to me that this can be accomplished, using mutual_info_classif from sklearn. However, this method is really slow, so I was trying to implement information gain myself based on this post.
I came up with the following solution:
from scipy.stats import entropy
import numpy as np
def information_gain(X, y):
def _entropy(labels):
counts = np.bincount(labels)
return entropy(counts, base=None)
def _ig(x, y):
# indices where x is set/not set
x_set = np.nonzero(x)[1]
x_not_set = np.delete(np.arange(x.shape[1]), x_set)
h_x_set = _entropy(y[x_set])
h_x_not_set = _entropy(y[x_not_set])
return entropy_full - (((len(x_set) / f_size) * h_x_set)
+ ((len(x_not_set) / f_size) * h_x_not_set))
entropy_full = _entropy(y)
f_size = float(X.shape[0])
scores = np.array([_ig(x, y) for x in X.T])
return scores
Using a very small dataset, most scores from sklearn and my implementation are equal. However, sklearn seems to take frequencies into account, which my algorithm clearly doesn't. For example
categories = ['talk.religion.misc', 'comp.graphics', 'sci.space']
newsgroups_train = fetch_20newsgroups(subset='train',
categories=categories)
X, y = newsgroups_train.data, newsgroups_train.target
cv = CountVectorizer(max_df=0.95, min_df=2,
max_features=100,
stop_words='english')
X_vec = cv.fit_transform(X)
t0 = time()
res_sk = mutual_info_classif(X_vec, y, discrete_features=True)
print("Time passed for sklearn method: %3f" % (time()-t0))
t0 = time()
res_ig = information_gain(X_vec, y)
print("Time passed for ig: %3f" % (time()-t0))
for name, res_mi, res_ig in zip(cv.get_feature_names(), res_sk, res_ig):
print("%s: mi=%f, ig=%f" % (name, res_mi, res_ig))
sample output:
center: mi=0.011824, ig=0.003548
christian: mi=0.128629, ig=0.127122
color: mi=0.028413, ig=0.026397
com: mi=0.041184, ig=0.030458
computer: mi=0.020590, ig=0.012327
cs: mi=0.007291, ig=0.001574
data: mi=0.020734, ig=0.008986
did: mi=0.035613, ig=0.024604
different: mi=0.011432, ig=0.005492
distribution: mi=0.007175, ig=0.004675
does: mi=0.019564, ig=0.006162
don: mi=0.024000, ig=0.017605
earth: mi=0.039409, ig=0.032981
edu: mi=0.023659, ig=0.008442
file: mi=0.048056, ig=0.045746
files: mi=0.041367, ig=0.037860
ftp: mi=0.031302, ig=0.026949
gif: mi=0.028128, ig=0.023744
god: mi=0.122525, ig=0.113637
good: mi=0.016181, ig=0.008511
gov: mi=0.053547, ig=0.048207
So I was wondering if my implementation is wrong, or it is correct, but a different variation of the mutual information algorithm scikit-learn uses.
A little late with my answer but you should look at Orange's implementation. Within their app it is used as a behind-the-scenes processor to help inform the dynamic model parameter building process.
The implementation itself looks fairly straightforward and could most likely be ported out. The entropy calculation first
The sections starting at https://github.com/biolab/orange3/blob/master/Orange/preprocess/score.py#L233
def _entropy(dist):
"""Entropy of class-distribution matrix"""
p = dist / np.sum(dist, axis=0)
pc = np.clip(p, 1e-15, 1)
return np.sum(np.sum(- p * np.log2(pc), axis=0) * np.sum(dist, axis=0) / np.sum(dist))
Then the second portion.
https://github.com/biolab/orange3/blob/master/Orange/preprocess/score.py#L305
class GainRatio(ClassificationScorer):
"""
Information gain ratio is the ratio between information gain and
the entropy of the feature's
value distribution. The score was introduced in [Quinlan1986]_
to alleviate overestimation for multi-valued features. See `Wikipedia entry on gain ratio
<http://en.wikipedia.org/wiki/Information_gain_ratio>`_.
.. [Quinlan1986] J R Quinlan: Induction of Decision Trees, Machine Learning, 1986.
"""
def from_contingency(self, cont, nan_adjustment):
h_class = _entropy(np.sum(cont, axis=1))
h_residual = _entropy(np.compress(np.sum(cont, axis=0), cont, axis=1))
h_attribute = _entropy(np.sum(cont, axis=0))
if h_attribute == 0:
h_attribute = 1
return nan_adjustment * (h_class - h_residual) / h_attribute
The actual scoring process happens at https://github.com/biolab/orange3/blob/master/Orange/preprocess/score.py#L218

How to configure lasso regression to not penalize certain variables?

I'm trying to use lasso regression in python.
I'm currently using lasso function in scikit-learn library.
I want my model not to penalize certain variables while training. (penalize only the rest of variables)
Below is my current code for training
rg_mdt = linear_model.LassoCV(alphas=np.array(10**np.linspace(0, -4, 100)), fit_intercept=True, normalize=True, cv=10)
rg_mdt.fit(df_mdt_rgmt.loc[df_mdt_rgmt.CLUSTER_ID == k].drop(['RESPONSE', 'CLUSTER_ID'], axis=1), df_mdt_rgmt.loc[df_mdt_rgmt.CLUSTER_ID == k, 'RESPONSE'])
df_mdt_rgmt is the data mart and I'm trying to keep the coefficient for certain columns non-zero.
glmnet in R provides 'penalty factor' parameter that let me do this, but how can I do that in python scikit-learn?
Below is the code I have in R
get.Lassomodel <- function(TB.EXP, TB.RSP){
VT.PEN <- rep(1, ncol(TB.EXP))
VT.PEN[which(colnames(TB.EXP) == "DC_RATE")] <- 0
VT.PEN[which(colnames(TB.EXP) == "FR_PRICE_PW_REP")] <- 0
VT.GRID <- 10^seq(0, -4, length=100)
REG.MOD <- cv.glmnet(as.matrix(TB.EXP), as.matrix(TB.RSP), alpha=1,
lambda=VT.GRID, penalty.factor=VT.PEN, nfolds=10, intercept=TRUE)
return(REG.MOD)
}
I'm afraid you can't. Of course it's not an theoretical issue, but just a design-decision.
My reasoning is based on the available API and while sometimes there are undocumented functions, this time i don't think there is what you need because the user-guide already posts this problem in the 1-factor-norm-of-all form alpha*||w||_1
Depending on your setting you might modify sklearn's code (a bit scared about CD-tunings) or even implement a customized-objective using scipy.optimize (although the latter might be a bit slower).
Here is some example showing the scipy.optimize approach. I simplified the problem by removing intercept's.
""" data """
import numpy as np
from sklearn import datasets
diabetes = datasets.load_diabetes()
A = diabetes.data[:150]
y = diabetes.target[:150]
alpha=0.1
weights=np.ones(A.shape[1])
""" sklearn """
from sklearn import linear_model
clf = linear_model.Lasso(alpha=alpha, fit_intercept=False)
clf.fit(A, y)
""" scipy """
from scipy.optimize import minimize
def lasso(x): # following sklearn's definition from user-guide!
return (1. / (2*A.shape[0])) * np.square(np.linalg.norm(A.dot(x) - y, 2)) + alpha * np.linalg.norm(weights*x, 1)
""" Test with weights = 1 """
x0 = np.zeros(A.shape[1])
res = minimize(lasso, x0, method='L-BFGS-B', options={'disp': False})
print('Equal weights')
print(lasso(clf.coef_), clf.coef_[:5])
print(lasso(res.x), res.x[:5])
""" Test scipy-based with special weights """
weights[[0, 3, 5]] = 0.0
res = minimize(lasso, x0, method='L-BFGS-B', options={'disp': False})
print('Specific weights')
print(lasso(res.x), res.x[:5])
Output:
Equal weights
12467.4614224 [-524.03922009 -75.41111354 820.0330707 40.08184085 -307.86020107]
12467.6514697 [-526.7102518 -67.42487561 825.70158417 40.04699607 -271.02909258]
Specific weights
12362.6078842 [ -6.12843589e+02 -1.51628334e+01 8.47561732e+02 9.54387812e+01
-1.02957112e-05]

Multi-objective optimisation using PyGMO

I am using the PyGMO package for Python, for multi-objective optimisation. I am unable to fix the dimension of the fitness function in the constructor, and the documentation is not very descriptive either. I am wondering if anyone here has had experience with PyGMO in the past: this could be fairly simple.
I try to construct a minimum example below:
from PyGMO.problem import base
from PyGMO import algorithm, population
import numpy as np
import matplotlib.pyplot as plt
class my_problem(base):
def __init__(self, fdim=2):
NUM_PARAMS = 4
super(my_problem, self).__init__(NUM_PARAMS)
self.set_bounds(0.01, 100)
def _objfun_impl(self, K):
E1 = K[0] + K[2]
E2 = K[1] + K[3]
return (E1, E2, )
if __name__ == '__main__':
prob = my_problem() # Create the problem
print (prob)
algo = algorithm.sms_emoa(gen=100)
pop = population(prob, 50)
pop = algo.evolve(pop)
F = np.array([ind.cur_f for ind in pop]).T
plt.scatter(F[0], F[1])
plt.xlabel("$E_1$")
plt.ylabel("$E_2$")
plt.show()
fdim=2 above is a failed attempt to set the fitness dimension. The code fails with the following error:
ValueError: ..\..\src\problem\base.cpp,584: fitness dimension was changed inside objfun_impl().
I'd be grateful if someone can help figure this out. Thanks!
Are you looking at the correct documentation?
There is no fdim (which anyway does nothing in your example since it is only a local variable and is not used). But there is n_obj:
n_obj: number of objectives. Defaults to 1
So, I think you want something like (corrected thanks to #Distopia):
#(...)
def __init__(self, fdim=2):
NUM_PARAMS = 4
super(my_problem, self).__init__(NUM_PARAMS, 0, fdim)
self.set_bounds(0.01, 100)
#(...)
I modified their example and this seemed to work for me.
#(...)
def __init__(self, fdim=2):
NUM_PARAMS = 4
# We call the base constructor as 'dim' dimensional problem, with 0 integer parts and 2 objectives.
super(my_problem, self).__init__(NUM_PARAMS,0,fdim)
self.set_bounds(0.01, 100)
#(...)

Categories