I am trying to create a Neural Network to predict the behavior of variable "miu".
Since I only have 6 data points, I tried to use spline to find more points that follow the behavior of the system to afterwards use all those points in the neural network.
I am trying to use 2 inputs, which are time and cell concentration. And the expected output would be the miu value, which is given as the derivative dy/dx where y is the cell concentration and x the time.
I implemented the following code:
from gekko import brain
import numpy as np
import matplotlib.pyplot as plt
from numpy import diff
from scipy.interpolate import CubicSpline
xm = np.array([ 0.0 , 23.0 , 47.0 , 49.0 ,\
71.5 , 95.0 , 119.0 , 143.0 ])
def spline(cell):
m = GEKKO()
m.options.IMODE=2
c = [m.FV(value=0) for i in range(4)]
x = m.Param(value=xm)
cell = np.array(cell)
y = m.CV(value=cell)
y.FSTATUS = 1
# polynomial model
m.Equation(y==c[0]+c[1]*x+c[2]*x**2+c[3]*x**3)
c[0].STATUS=1
m.solve(disp=False)
c[1].STATUS=1
m.solve(disp=False)
c[2].STATUS=1
c[3].STATUS=1
m.solve(disp=False)
pbr = [c[3].value[0],c[2].value[0],\
c[1].value[0],c[0].value[0]]
print(pbr)
xp = np.linspace(0,144,100)
plot1 = plt.figure(1)
if cell[0] == cell_br2[0]:
plt.plot(xm,cell_br2, 'ko', label ='BR2')
plt.plot(xp,np.polyval(pbr,xp),'g:',linewidth=2)
elif cell[0] == cell_br1[0] :
plt.plot(xm,cell_br1, 'mo', label ='BR1')
plt.plot(xp,np.polyval(pbr,xp),'r:',linewidth=2)
plt.xlabel('time(hr)')
plt.ylabel('cells')
plt.legend()
dx = diff(xp)
dy1 = diff(np.polyval(pbr,xp))
deriv1 = dy1/dx
time =np.linspace(0,144,99)
plot1 = plt.figure(2)
if cell[0] == cell_br2[0]:
plt.plot(time,deriv1,'b:',linewidth=2, label ='BR2')
elif cell[0] == cell_br1[0]:
plt.plot(time,deriv1,'m:',linewidth=2, label ='BR1')
plt.xlabel('time(hr)')
plt.ylabel('miu(1/h)')
plt.legend()
plt.show()
return(deriv1)
m = GEKKO()
cell_br1 = (0.63*10**6 , 1.10*10**6, 2.06*10**6, 2.08*10**6,\
3.73*10**6, 3.89*10**6, 3.47*10**6,2.312*10**6)
cell_br2= (0.58*10**6 , 0.96*10**6, 2.07*10**6, 1.79*10**6,\
3.57*10**6, 3.34*10**6, 2.62*10**6, 1.75*10**6)
b = brain.Brain()
b.input_layer(2)
b.layer(linear=5)
b.layer(tanh=5)
b.layer(linear=5)
b.output_layer(1)
x_s = np.linspace(0,144,99)
xg = np.array([ 0.0 , 23.0 , 47.0 , 49.0 , 71.5 ,\
95.0 , 119.0 , 144.0 ])
cells_spline = CubicSpline(xm, cell_br1)
y_cells = cells_spline(x_s)
miu_1 = spline(cell_br1)
miu_2 = spline(cell_br2)
x = (x_s, y_cells)#, y_glucose) #Inputs (3)
y = (miu_1) #Output (2)
b.learn(x,y) # train
xp = np.linspace(0,144,99)
yp = b.think(x) # validate
yyp = np.array(yp)
miu = np.reshape(yyp, (99,))
plot1 = plt.figure(3)
plt.plot(xp,miu,'r-', label = 'Predicted ')
plt.plot(x_s,miu_1,'bo', label = 'Experimental points')
plt.xlabel('Time [hr]')
plt.ylabel('miu [1/h]')
plt.legend()
plt.show()
Although solver finds a solution, it is constant, which indicated that the solver is not working. My output is the following :
Can someone please help? I can't find what is failing. Thanks
Here are a couple issues with your current approach:
The training uses two inputs while the validation uses only one input
The data is not scaled. It generally helps if you scale the data to -1 to 1. I included a simple scalar but there are better ways to do this that also zero-center the data.
from gekko import brain
from gekko import GEKKO
import numpy as np
import matplotlib.pyplot as plt
from numpy import diff
from scipy.interpolate import CubicSpline
xm = np.array([ 0.0 , 23.0 , 47.0 , 49.0 ,\
71.5 , 95.0 , 119.0 , 143.0 ])
def spline(cell):
m = GEKKO()
m.options.IMODE=2
c = [m.FV(value=0) for i in range(4)]
x = m.Param(value=xm)
cell = np.array(cell)
y = m.CV(value=cell)
y.FSTATUS = 1
# polynomial model
m.Equation(y==c[0]+c[1]*x+c[2]*x**2+c[3]*x**3)
c[0].STATUS=1
m.solve(disp=False)
c[1].STATUS=1
m.solve(disp=False)
c[2].STATUS=1
c[3].STATUS=1
m.solve(disp=False)
pbr = [c[3].value[0],c[2].value[0],\
c[1].value[0],c[0].value[0]]
print(pbr)
xp = np.linspace(0,144,100)
plot1 = plt.figure(1)
if cell[0] == cell_br2[0]:
plt.plot(xm,cell_br2, 'ko', label ='BR2')
plt.plot(xp,np.polyval(pbr,xp),'g:',linewidth=2)
elif cell[0] == cell_br1[0] :
plt.plot(xm,cell_br1, 'mo', label ='BR1')
plt.plot(xp,np.polyval(pbr,xp),'r:',linewidth=2)
plt.xlabel('time(hr)')
plt.ylabel('cells')
plt.legend()
dx = diff(xp)
dy1 = diff(np.polyval(pbr,xp))
deriv1 = dy1/dx
time =np.linspace(0,144,99)
plot1 = plt.figure(2)
if cell[0] == cell_br2[0]:
plt.plot(time,deriv1,'b:',linewidth=2, label ='BR2')
elif cell[0] == cell_br1[0]:
plt.plot(time,deriv1,'m:',linewidth=2, label ='BR1')
plt.xlabel('time(hr)')
plt.ylabel('miu(1/h)')
plt.legend()
#plt.show()
return(deriv1)
cell_br1 = np.array([0.63*10**6 , 1.10*10**6, 2.06*10**6, 2.08*10**6,\
3.73*10**6, 3.89*10**6, 3.47*10**6,2.312*10**6])
cell_br2= np.array([0.58*10**6 , 0.96*10**6, 2.07*10**6, 1.79*10**6,\
3.57*10**6, 3.34*10**6, 2.62*10**6, 1.75*10**6])
b = brain.Brain(remote=True)
b.input_layer(1)
b.layer(linear=1)
b.layer(tanh=4)
b.layer(linear=1)
b.output_layer(1)
x_s = np.linspace(0,144,99)
xg = np.array([ 0.0 , 23.0 , 47.0 , 49.0 , 71.5 ,\
95.0 , 119.0 , 144.0 ])
cells_spline = CubicSpline(xm, cell_br1)
y_cells = cells_spline(x_s)
miu_1 = spline(cell_br1)
miu_2 = spline(cell_br2)
scale = [1.0e6,1.0e4]
x = (y_cells/scale[0]) #, y_glucose) #Inputs (3)
y = (miu_1/scale[1]) #Output (2)
b.learn(x,y) # train
yp = b.think(x) # validate
xp = np.linspace(0,144,99)
yyp = np.array(yp)
miu = np.reshape(yyp, (99,))
plot1 = plt.figure(3)
plt.plot(xp,miu*scale[1],'r-', label = 'Predicted ')
plt.plot(x_s,miu_1,'bo', label = 'Experimental points')
plt.xlabel('Time [hr]')
plt.ylabel('miu [1/h]')
plt.legend()
plt.show()
Recommendations:
Adjust the number of nodes and types of layers.
Use a package such as Keras or PyTorch for this type of problem. Here is a tutorial on Keras. Gekko is especially good at problems that need extra things such as constraints, non-standard activation functions, and hybrid machine learning where the model is a combination of physics-based and empirical elements.
Gekko uses gradient-based solvers that may get stuck at local minima.
Related
I am trying to use sysid to regress an ARX model periodically, then evaluate the predictive ability of that model by simulating with the future inputs and comparing the output with the experimental data. When I try to solve using m.solve() I get the following error: Exception: Data arrays must have the same length, and match time discretization in dynamic problems
The following is an MRE:
X = [[ 0.9, 0.], [ 0.9, 0.],[ 0.9,0.],[ 0.9,0.],[ 0.9, 0.],[ 0.5,0.],[0.5,0.],[0.5,0.],[0.5,0.], [ 0.5, 0.]] # 2 values for inputs at each time step
Y = [20.3, 20.3, 20.2, 20.2, 20.1, 20.1, 20.1, 20., 19.9, 19.8,] # 1 output at each time step
t = np.linspace(0, 9*300, 10) # 10 points 5 minutes apart each
na = 1 # output coefficients
nb = 2 # input coefficients
res, p, K = m.sysid(t, X, Y, na, nb, pred='meas')
m.time = t - t[0]
y_, u_ = m.arx(p)
u_[0].value = X[0]
u_[1].value = X[1]
m.options.imode = 4
m.options.nodes = 2
# simulate
m.solve()
I don't want to control, rather apply the experimental values to future timesteps and see how the ARX model extrapolates.
Thanks for your help
The problem was with this section and how the data is loaded into value:
u_[0].value = X[:,0]
u_[1].value = X[:,1]
y_[0].value = Y
Try printing X[0] and X[1] to see that they are only the first and second elements of the original list. Converting to a numpy array helps with the slicing.
import numpy as np
from gekko import GEKKO
m = GEKKO()
X = np.array([[ 0.9, 0.], [ 0.9, 0.],\
[ 0.9,0.],[ 0.9,0.],\
[ 0.9, 0.],[ 0.5,0.],\
[0.5,0.],[0.5,0.],\
[0.5,0.], [ 0.5, 0.]]) # 2 values for inputs at each time step
Y = np.array([20.3, 20.3, 20.2, 20.2,\
20.1, 20.1, 20.1, 20., \
19.9, 19.8]) # 1 output at each time step
t = np.linspace(0, 9*300, 10) # 10 points 5 minutes apart each
na = 1 # output coefficients
nb = 2 # input coefficients
res, p, K = m.sysid(t, X, Y, na, nb, pred='meas')
m.time = t - t[0]
y_, u_ = m.arx(p)
u_[0].value = X[:,0]
u_[1].value = X[:,1]
y_[0].value = Y
print(X[0])
print(X[1])
m.options.imode = 4
m.options.nodes = 2
# simulate
m.solve()
Here is another example with Pandas DataFrames:
from gekko import GEKKO
import pandas as pd
import matplotlib.pyplot as plt
# load data and parse into columns
url = 'http://apmonitor.com/do/uploads/Main/tclab_dyn_data2.txt'
data = pd.read_csv(url)
t = data['Time']
u = data[['H1','H2']]
y = data['T1']
# generate time-series model
m = GEKKO(remote=False) # remote=True for MacOS
# system identification
na = 2 # output coefficients
nb = 2 # input coefficients
yp,p,K = m.sysid(t,u,y,na,nb,diaglevel=1)
plt.figure()
plt.subplot(2,1,1)
plt.plot(t,u)
plt.legend([r'$u_0$',r'$u_1$'])
plt.ylabel('MVs')
plt.subplot(2,1,2)
plt.plot(t,y)
plt.plot(t,yp)
plt.legend([r'$y_0$',r'$z_0$'])
plt.ylabel('CVs')
plt.xlabel('Time')
plt.savefig('sysid.png')
plt.show()
We're also working on a package with a Seeq add-on for system identification that runs in Python and Jupyter notebooks.
I am having problems calculating the coordinates for the for distance matrix D :
D : R D4 D6
R [[0. 234. 150.]
D4 [234. 0. 231.]
D6 [150. 231. 0.]]
My python code is as follows:
import numpy
from numpy import linalg as LA
import matplotlib.pyplot as plt
import math
def calc_points_and_plot():
distance_matric = numpy.zeros((3, 3))
weight_router_to_dongle_04 = 234
weight_router_to_dongle_06 = 150
weight_dongle_04_to_dongle_06 = 231
distance_matric[0][1] = weight_router_to_dongle_04
distance_matric[0][2] = weight_router_to_dongle_06
distance_matric[1][0] = weight_router_to_dongle_04
distance_matric[1][2] = weight_dongle_04_to_dongle_06
distance_matric[2][0] = weight_router_to_dongle_06
distance_matric[2][1] = weight_dongle_04_to_dongle_06
print(f"dist matric \n {distance_matric}")
m_matrix = numpy.zeros((3, 3))
for i in range(3):
for j in range(3):
m_matrix[i][j] = 0.5 * ((distance_matric[1][j]**2) + (distance_matric[i][1]**2) -(distance_matric[i][j]**2))
print(f"m_matrix \n {m_matrix}")
eigvals, eigvecs = LA.eig(m_matrix)
print(f"eigen vals : \n {eigvals}")
print(f"eigen vectors : \n {eigvecs}")
results = []
for i in range(3):
if eigvals[i] != 0:
results.append(math.sqrt(eigvals[i])*eigvecs[i])
print(f"results \n {results}")
coords = numpy.reshape(results, (2,3)).T
print(coords)
X_vals = [coords[i][0] for i in range(3)]
Y_vals = [coords[i][1] for i in range(3)]
plt.annotate(f"Roouter", xy=(X_vals[0], Y_vals[0]))
plt.annotate(f"device 4", xy=(X_vals[1], Y_vals[1]))
plt.annotate(f"device 6", xy=(X_vals[2], Y_vals[2]))
plt.scatter(X_vals , Y_vals, s=230, c="black", marker="D")
plt.scatter(X_vals , Y_vals, s=180, c="red", marker="D" )
plt.plot([X_vals[0], X_vals[1]], [Y_vals[0], Y_vals[1]], c="red", linewidth=1, linestyle='--')
plt.plot([X_vals[0], X_vals[2]], [Y_vals[0], Y_vals[2]], c="red", linewidth=1, linestyle='--')
plt.plot([X_vals[1], X_vals[2]], [Y_vals[1], Y_vals[2]], c="red", linewidth=1, linestyle='--')
# Verify distances to given distance matrix
dist1 = math.sqrt((X_vals[0]-X_vals[1])**2 + (Y_vals[0]- Y_vals[1])**2)
dist2 = math.sqrt((X_vals[0]-X_vals[2])**2 + (Y_vals[0]- Y_vals[2])**2)
dist3 = math.sqrt((X_vals[1]-X_vals[2])**2 + (Y_vals[1]- Y_vals[2])**2)
print(f"dist1 : {dist1}" )
print(f"dist2 : {dist2}" )
print(f"dist3 : {dist3}" )
plt.show()
if __name__ == "__main__":
calc_points_and_plot()
The following pictures are the actual output and the expected Actual output Expected output
I followed this link finding-the-coordinates-of-points-from-distance-matrix, but still didnt manage to get the correct (x,y) points.
Looking at the actual output, the distance from router to device4 has a point to point distance of 440, even though the given distance matrix says otherwise.
I wrote a piece code to make a simple linear regression model using Python. However, I am having trouble getting the correct cost function, and most importantly the correct theta parameters. The model is implemented from scratch and not using Scikit learn module. I have used Andrew NG's notes from his ML Coursera course to create the model. The correct values of theta are [[-3.630291] [1.166362]].
Would be really grateful if someone could offer their expertise, and point out what I'm doing wrong.
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
#Load The Dataset
dataset = pd.read_csv("Population vs Profit.txt",names=["Population" ,
"Profit"])
print (dataset.head())
col = len(dataset.columns)
x = dataset.iloc[:,:col-1].values
y = dataset.iloc[:,col-1].values
#Visualizing The Dataset
plt.scatter(x, y, color="red", marker="x", label="Profit")
plt.title("Population vs Profit")
plt.xlabel("Population")
plt.ylabel("Profit")
plt.legend()
plt.show()
#Preprocessing Data
dataset.insert(0,"x0",1)
col = len(dataset.columns)
x = dataset.iloc[:,:col-1].values
b = np.zeros(col-1)
m = len(y)
costlist = []
alpha = 0.001
iteration = 10000
#Defining Functions
def hypothesis(x,b,y):
h = x.dot(b.T) - y
return h
def cost(x,b,y,m):
j = np.sum(hypothesis(x,b,y)**2)
j = j/(2*m)
return j
print (cost(x,b,y,m))
def gradient_descent(x,b,y,m,alpha):
for i in range (iteration):
h = hypothesis(x,b,y)
product = np.sum(h.dot(x))
b = b - ((alpha/m)*product)
costlist.append(cost(x,b,y,m))
return b,cost(x,b,y,m)
b , mincost = gradient_descent(x,b,y,m,alpha)
print (b , mincost)
print (cost(x,b,y,m))
plt.plot(b,color="green")
plt.show()
The dataset I'm using is the following text.
6.1101,17.592
5.5277,9.1302
8.5186,13.662
7.0032,11.854
5.8598,6.8233
8.3829,11.886
7.4764,4.3483
8.5781,12
6.4862,6.5987
5.0546,3.8166
5.7107,3.2522
14.164,15.505
5.734,3.1551
8.4084,7.2258
5.6407,0.71618
5.3794,3.5129
6.3654,5.3048
5.1301,0.56077
6.4296,3.6518
7.0708,5.3893
6.1891,3.1386
20.27,21.767
5.4901,4.263
6.3261,5.1875
5.5649,3.0825
18.945,22.638
12.828,13.501
10.957,7.0467
13.176,14.692
22.203,24.147
5.2524,-1.22
6.5894,5.9966
9.2482,12.134
5.8918,1.8495
8.2111,6.5426
7.9334,4.5623
8.0959,4.1164
5.6063,3.3928
12.836,10.117
6.3534,5.4974
5.4069,0.55657
6.8825,3.9115
11.708,5.3854
5.7737,2.4406
7.8247,6.7318
7.0931,1.0463
5.0702,5.1337
5.8014,1.844
11.7,8.0043
5.5416,1.0179
7.5402,6.7504
5.3077,1.8396
7.4239,4.2885
7.6031,4.9981
6.3328,1.4233
6.3589,-1.4211
6.2742,2.4756
5.6397,4.6042
9.3102,3.9624
9.4536,5.4141
8.8254,5.1694
5.1793,-0.74279
21.279,17.929
14.908,12.054
18.959,17.054
7.2182,4.8852
8.2951,5.7442
10.236,7.7754
5.4994,1.0173
20.341,20.992
10.136,6.6799
7.3345,4.0259
6.0062,1.2784
7.2259,3.3411
5.0269,-2.6807
6.5479,0.29678
7.5386,3.8845
5.0365,5.7014
10.274,6.7526
5.1077,2.0576
5.7292,0.47953
5.1884,0.20421
6.3557,0.67861
9.7687,7.5435
6.5159,5.3436
8.5172,4.2415
9.1802,6.7981
6.002,0.92695
5.5204,0.152
5.0594,2.8214
5.7077,1.8451
7.6366,4.2959
5.8707,7.2029
5.3054,1.9869
8.2934,0.14454
13.394,9.0551
5.4369,0.61705
One issue is with your "product". It is currently a number when it should be a vector. I was able to get the values [-3.24044334 1.12719788] by rerwitting your for-loop as follows:
def gradient_descent(x,b,y,m,alpha):
for i in range (iteration):
h = hypothesis(x,b,y)
#product = np.sum(h.dot(x))
xvalue = x[:,1]
product = h.dot(xvalue)
hsum = np.sum(h)
b = b - ((alpha/m)* np.array([hsum , product]) )
costlist.append(cost(x,b,y,m))
return b,cost(x,b,y,m)
There's possibly another issue besides this as it doesn't converge to your answer. You should make sure you are using the same alpha also.
There is an equation of exponential truncated power law in the article below:
Gonzalez, M. C., Hidalgo, C. A., & Barabasi, A. L. (2008). Understanding individual human mobility patterns. Nature, 453(7196), 779-782.
like this:
It is an exponential truncated power law. There are three parameters to be estimated: rg0, beta and K. Now we have got several users' radius of gyration(rg), and uploaded it onto Github: radius of gyrations.txt
The following codes can be used to read data and calculate P(rg):
import numpy as np
# read radius of gyration from file
rg = []
with open('/path-to-the-data/radius of gyrations.txt', 'r') as f:
for i in f:
rg.append(float(i.strip('\n')))
# calculate P(rg)
rg = sorted(rg, reverse=True)
rg = np.array(rg)
prg = np.arange(len(sorted_data)) / float(len(sorted_data)-1)
or you can directly get rg and prg data as the following:
rg = np.array([ 20.7863444 , 9.40547933, 8.70934714, 8.62690145,
7.16978087, 7.02575052, 6.45280959, 6.44755478,
5.16630287, 5.16092884, 5.15618737, 5.05610068,
4.87023561, 4.66753197, 4.41807645, 4.2635671 ,
3.54454372, 2.7087178 , 2.39016885, 1.9483156 ,
1.78393238, 1.75432688, 1.12789787, 1.02098332,
0.92653501, 0.32586582, 0.1514813 , 0.09722761,
0. , 0. ])
prg = np.array([ 0. , 0.03448276, 0.06896552, 0.10344828, 0.13793103,
0.17241379, 0.20689655, 0.24137931, 0.27586207, 0.31034483,
0.34482759, 0.37931034, 0.4137931 , 0.44827586, 0.48275862,
0.51724138, 0.55172414, 0.5862069 , 0.62068966, 0.65517241,
0.68965517, 0.72413793, 0.75862069, 0.79310345, 0.82758621,
0.86206897, 0.89655172, 0.93103448, 0.96551724, 1. ])
I can plot the P(r_g) and r_g using the following python script:
import matplotlib.pyplot as plt
%matplotlib inline
plt.plot(rg, prg, 'bs', alpha = 0.3)
# roughly estimated params:
# rg0=1.8, beta=0.15, K=5
plt.plot(rg, (rg+1.8)**-.15*np.exp(-rg/5))
plt.yscale('log')
plt.xscale('log')
plt.xlabel('$r_g$', fontsize = 20)
plt.ylabel('$P(r_g)$', fontsize = 20)
plt.show()
How can I use these data of rgs to estimate the three parameters above? I hope to solve it using python.
According to #Michael 's suggestion, we can solve the problem using scipy.optimize.curve_fit
def func(rg, rg0, beta, K):
return (rg + rg0) ** (-beta) * np.exp(-rg / K)
from scipy import optimize
popt, pcov = optimize.curve_fit(func, rg, prg, p0=[1.8, 0.15, 5])
print popt
print pcov
The results are given below:
[ 1.04303608e+03 3.02058550e-03 4.85784945e+00]
[[ 1.38243336e+18 -6.14278286e+11 -1.14784675e+11]
[ -6.14278286e+11 2.72951900e+05 5.10040746e+04]
[ -1.14784675e+11 5.10040746e+04 9.53072925e+03]]
Then we can inspect the results by plotting the fitted curve.
%matplotlib inline
import matplotlib.pyplot as plt
plt.plot(rg, prg, 'bs', alpha = 0.3)
plt.plot(rg, (rg+popt[0])**-(popt[1])*np.exp(-rg/popt[2]) )
plt.yscale('log')
plt.xscale('log')
plt.xlabel('$r_g$', fontsize = 20)
plt.ylabel('$P(r_g)$', fontsize = 20)
plt.show()
I'd like to implement a hierarchical bayesian model with PyMC3. Before designing a complex model, I'm trying to get accustomed with PyMC3 by implementing Bayes PCA and comparing the results with sklearn.decomposition.pca
In 1:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
# Generate data
nsamp_cl = 1000 #Number of samples per class and per site
cov = np.matrix([[1, 0.9, 0.05],
[0.9, 1, 0.05],
[0.05, 0.05, 1]])
nfeat = cov.shape[0] #Number of features
X0 = np.random.multivariate_normal(np.zeros(nfeat),cov,nsamp_cl)
X1 = np.random.multivariate_normal(np.zeros(nfeat),cov,nsamp_cl)
# Rotate class 1
theta = np.radians(90)
cos, sin = np.cos(theta), np.sin(theta)
R = np.matrix('{} {}; {} {}'.format(cos, -sin, sin, cos))
X1[:,0:2] = np.dot(X1[:,0:2],R.T)
X = np.concatenate([X0,X1])
Y = np.concatenate([np.zeros(X0.shape[0]),np.ones(X1.shape[0])])
n = X.shape[0]
d = X.shape[1]
In 2:
plt.figure()
cols = ['b','r']
colors = [cols[y.astype(int)] for y in Y ]
plt.scatter(X[:,0],X[:,1],20,colors)
plt.title('Features 0 and 1')
plt.figure()
cols = ['b','r']
colors = [cols[y.astype(int)] for y in Y ]
plt.scatter(X[:,1],X[:,2],20,colors)
plt.title('Features 1 and 2')
Out 2:
In 3:
from pymc3 import Model,Normal,Gamma,math,variational
common_latent_model = Model()
# Builiding a latent model to extract site-robust principal components
with common_latent_model:
n_latent = 3
#ARD prior
alphas = Gamma('alphas', alpha=1e-6, beta=1e-6, shape=n_latent)
# Weight vector
w = Normal('w',mu=0,tau=alphas,shape=(d,n_latent))
# Latent space
z = Normal('z',mu=0,tau=1,shape=(n,n_latent))
# Multiply latent variables by W to go from latent to observation space
t = math.dot(z,w.T)
# Add bias
mu = Normal('mu', mu=0, tau=0.01, shape=d)
u = t + mu
# Precision of the observation
sigma = Gamma('sigma',alpha=1e-6, beta=1e-6,shape=1)
# Likelihood (sampling distribution) of observations
X_obs = Normal('X_obs', mu=u, tau=sigma, observed=X)
with common_latent_model:
means, sds, elbos = variational.advi(n=10000,learning_rate=0.1, accurate_elbo=True)#100000)
plt.plot(elbos)
plt.ylabel('ELBO')
plt.xlabel('iteration')
In [4]:
for key in means:
print "key: %s , value: %s" % (key, means[key])
key: mu , value: [ 0.03288066 -0.05347487 0.00260641]
key: alphas_log_ , value: [ 6.94631195 6.85621834 6.84792233]
key: sigma_log_ , value: [-0.009662]
key: z , value: [[-0.01260083 -0.00460729 -0.01360558]
[-0.02817471 0.04281501 0.01643355]
[-0.05178572 -0.02470609 -0.05092171]
...,
[-0.05201711 0.00150599 -0.01167801]
[-0.01097088 -0.02666511 0.03660954]
[ 0.0609949 0.01156182 0.01814843]]
key: w , value: [[-0.06004834 0.00599346 -0.03071374]
[ 0.00668656 -0.01306511 0.00400904]
[-0.00141243 -0.00778869 0.03257137]]
In [5]:
PC_bayes = means['z']
plt.figure()
cols = ['b','r']
colors = [cols[y.astype(int)] for y in Y ]
plt.scatter(PC_bayes[:,0],PC_bayes[:,1],20,colors,alpha=.1)
Out [5]:
In [6]:
from sklearn.decomposition import PCA
pca = PCA(n_components=2)
pca.fit(X)
PC = pca.transform(X)
In [7]:
plt.figure()
cols = ['b','r']
colors = [cols[y.astype(int)] for y in Y ]
plt.scatter(PC[:,0],PC[:,1],20,colors,alpha=.1)
Out [7]:
(You can find the iPython Notebook here:
https://github.com/peppeFarAway/pymc3/blob/master/BayesPCA.ipynb)
Why can't my Bayes PCA implementation recover the Principal Components, while sklearn.decomposition.pca can? Where am I making a mistake?
The main reference I used to implement the model are:
https://blogs.msdn.microsoft.com/infernet_team_blog/2011/09/30/bayesian-pca/