Related
I have a curve parameterized by time that intersects a shape (in this case just a rectangle). Following this elegant suggestion, I used shapely to determine where the objects intersect, however from there on, I struggle to find a good solution for when that happens. Currently, I am approximating the time awkwardly by finding the point of the curve that is closest (in space) to the intersection, and then using its time stamp.
But I believe there should be a better solution e.g. by solving the polynomial equation, maybe using the root method of a numpy polynomial. I'm just not sure how to do this, because I guess you would need to somehow introduce tolerances as it is likely that the curve will never assume exactly the same intersection coordinates as determined by shapely.
Here is my code:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle, Ellipse
from matplotlib.collections import LineCollection
from shapely.geometry import LineString, Polygon
# the parameterized curve
coeffs = np.array([
[-2.65053088e-05, 2.76890591e-05],
[-5.70681576e-02, -2.69415587e-01],
[7.92564148e+02, 6.88557419e+02],
])
t_fit = np.linspace(-2400, 3600, 1000)
x_fit = np.polyval(coeffs[:, 0], t_fit)
y_fit = np.polyval(coeffs[:, 1], t_fit)
curve = LineString(np.column_stack((x_fit, y_fit)))
# the shape it intersects
area = {'x': [700, 1000], 'y': [1300, 1400]}
area_shape = Polygon([
(area['x'][0], area['y'][0]),
(area['x'][1], area['y'][0]),
(area['x'][1], area['y'][1]),
(area['x'][0], area['y'][1]),
])
# attempt at finding the time of intersection
intersection = curve.intersection(area_shape).coords[-1]
distances = np.hypot(x_fit-intersection[0], y_fit-intersection[1])
idx = np.where(distances == min(distances))
fit_intersection = x_fit[idx][0], y_fit[idx][0]
t_intersection = t_fit[idx]
print(t_intersection)
# code for visualization
fig, ax = plt.subplots(figsize=(5, 5))
ax.margins(0.4, 0.2)
ax.invert_yaxis()
area_artist = Rectangle(
(area['x'][0], area['y'][0]),
width=area['x'][1] - area['x'][0],
height=area['y'][1] - area['y'][0],
edgecolor='gray', facecolor='none'
)
ax.add_artist(area_artist)
points = np.array([x_fit, y_fit]).T.reshape(-1, 1, 2)
segments = np.concatenate([points[:-1], points[1:]], axis=1)
z = np.linspace(0, 1, points.shape[0])
norm = plt.Normalize(z.min(), z.max())
lc = LineCollection(
segments, cmap='autumn', norm=norm, alpha=1,
linewidths=2, picker=8, capstyle='round',
joinstyle='round'
)
lc.set_array(z)
ax.add_collection(lc)
ax.autoscale_view()
ax.relim()
trans = (ax.transData + ax.transAxes.inverted()).transform
intersection_point = Ellipse(
xy=trans(fit_intersection), width=0.02, height=0.02, fc='none',
ec='black', transform=ax.transAxes, zorder=3,
)
ax.add_artist(intersection_point)
plt.show()
And just for the visuals, here is what the problem looks like in a plot:
The best is to use interpolation functions to compute (x(t), y(t)). And use a function to compute d(t): the distance to intersection. Then we use scipy.optimize.minimize on d(t) to find the t value at which d(t) is minimum. Interpolation will ensure good accuracy.
So, I added a few modifications to you code.
definitions of interpolation functions and distance calculation
Test if there is indeed intersection, otherwise it doesn't make sense.
Compute the intersection time by minimization
The code (UPDATED):
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle, Ellipse
from matplotlib.collections import LineCollection
from shapely.geometry import LineString, Polygon
from scipy.optimize import minimize
# Interpolate (x,y) at time t:
def interp_xy(t,tp, fpx,fpy):
# tp: time grid points, fpx, fpy: the corresponding x,y values
x=np.interp(t, tp, fpx)
y=np.interp(t, tp, fpy)
return x,y
# Compute distance to intersection:
def dist_to_intersect(t,tp, fpx, fpy, intersection):
x,y = interp_xy(t,tp,fpx,fpy)
d=np.hypot(x-intersection[0], y-intersection[1])
return d
# the parameterized curve
t_fit = np.linspace(-2400, 3600, 1000)
#t_fit = np.linspace(-4200, 0, 1000)
coeffs = np.array([[-2.65053088e-05, 2.76890591e-05],[-5.70681576e-02, -2.69415587e-01],[7.92564148e+02, 6.88557419e+02],])
#t_fit = np.linspace(-2400, 3600, 1000)
#coeffs = np.array([[4.90972365e-05, -2.03897149e-04],[2.19222264e-01, -1.63335372e+00],[9.33624672e+02, 1.07067102e+03], ])
#t_fit = np.linspace(-2400, 3600, 1000)
#coeffs = np.array([[-2.63100091e-05, -7.16542227e-05],[-5.60829940e-04, -3.19183803e-01],[7.01544289e+02, 1.24732452e+03], ])
#t_fit = np.linspace(-2400, 3600, 1000)
#coeffs = np.array([[-2.63574223e-05, -9.15525038e-05],[-8.91039302e-02, -4.13843734e-01],[6.35650643e+02, 9.40010900e+02], ])
x_fit = np.polyval(coeffs[:, 0], t_fit)
y_fit = np.polyval(coeffs[:, 1], t_fit)
curve = LineString(np.column_stack((x_fit, y_fit)))
# the shape it intersects
area = {'x': [700, 1000], 'y': [1300, 1400]}
area_shape = Polygon([
(area['x'][0], area['y'][0]),
(area['x'][1], area['y'][0]),
(area['x'][1], area['y'][1]),
(area['x'][0], area['y'][1]),
])
# attempt at finding the time of intersection
curve_intersection = curve.intersection(area_shape)
# We check if intersection is empty or not:
if not curve_intersection.is_empty:
# We can get the coords because intersection is not empty
intersection=curve_intersection.coords[-1]
distances = np.hypot(x_fit-intersection[0], y_fit-intersection[1])
print("Looking for minimal distance to intersection: ")
print('-------------------------------------------------------------------------')
# Call to minimize:
# We pass:
# - the function to be minimized (dist_to_intersect)
# - a starting value to t
# - arguments, method and tolerance tol. The minimization will succeed when
# dist_to_intersect < tol=1e-6
# - option: here --> verbose
dmin=np.min((x_fit-intersection[0])**2+(y_fit-intersection[1])**2)
index=np.where((x_fit-intersection[0])**2+(y_fit-intersection[1])**2==dmin)
t0=t_fit[index]
res = minimize(dist_to_intersect, t0, args=(t_fit, x_fit, y_fit, intersection), method='Nelder-Mead',tol = 1e-6, options={ 'disp': True})
print('-------------------------------------------------------------------------')
print("Result of the optimization:")
print(res)
print('-------------------------------------------------------------------------')
print("Intersection at time t = ",res.x[0])
fit_intersection = interp_xy(res.x[0],t_fit, x_fit,y_fit)
print("Intersection point : ",fit_intersection)
else:
print("No intersection.")
# code for visualization
fig, ax = plt.subplots(figsize=(5, 5))
ax.margins(0.4, 0.2)
ax.invert_yaxis()
area_artist = Rectangle(
(area['x'][0], area['y'][0]),
width=area['x'][1] - area['x'][0],
height=area['y'][1] - area['y'][0],
edgecolor='gray', facecolor='none'
)
ax.add_artist(area_artist)
#plt.plot(x_fit,y_fit)
points = np.array([x_fit, y_fit]).T.reshape(-1, 1, 2)
segments = np.concatenate([points[:-1], points[1:]], axis=1)
z = np.linspace(0, 1, points.shape[0])
norm = plt.Normalize(z.min(), z.max())
lc = LineCollection(
segments, cmap='autumn', norm=norm, alpha=1,
linewidths=2, picker=8, capstyle='round',
joinstyle='round'
)
lc.set_array(z)
ax.add_collection(lc)
# Again, we check that intersection exists because we don't want to draw
# an non-existing point (it would generate an error)
if not curve_intersection.is_empty:
plt.plot(fit_intersection[0],fit_intersection[1],'o')
plt.show()
OUTPUT:
Looking for minimal distance to intersection:
-------------------------------------------------------------------------
Optimization terminated successfully.
Current function value: 0.000000
Iterations: 31
Function evaluations: 62
-------------------------------------------------------------------------
Result of the optimization:
final_simplex: (array([[-1898.91943932],
[-1898.91944021]]), array([8.44804735e-09, 3.28684898e-07]))
fun: 8.448047349426054e-09
message: 'Optimization terminated successfully.'
nfev: 62
nit: 31
status: 0
success: True
x: array([-1898.91943932])
-------------------------------------------------------------------------
Intersection at time t = -1898.919439315796
Intersection point : (805.3563860471179, 1299.9999999916085)
Whereas your code gives a much less precise result:
t=-1901.5015015
intersection point: (805.2438793482748,1300.9671136070717)
Figure:
I am trying to create a Neural Network to predict the behavior of variable "miu".
Since I only have 6 data points, I tried to use spline to find more points that follow the behavior of the system to afterwards use all those points in the neural network.
I am trying to use 2 inputs, which are time and cell concentration. And the expected output would be the miu value, which is given as the derivative dy/dx where y is the cell concentration and x the time.
I implemented the following code:
from gekko import brain
import numpy as np
import matplotlib.pyplot as plt
from numpy import diff
from scipy.interpolate import CubicSpline
xm = np.array([ 0.0 , 23.0 , 47.0 , 49.0 ,\
71.5 , 95.0 , 119.0 , 143.0 ])
def spline(cell):
m = GEKKO()
m.options.IMODE=2
c = [m.FV(value=0) for i in range(4)]
x = m.Param(value=xm)
cell = np.array(cell)
y = m.CV(value=cell)
y.FSTATUS = 1
# polynomial model
m.Equation(y==c[0]+c[1]*x+c[2]*x**2+c[3]*x**3)
c[0].STATUS=1
m.solve(disp=False)
c[1].STATUS=1
m.solve(disp=False)
c[2].STATUS=1
c[3].STATUS=1
m.solve(disp=False)
pbr = [c[3].value[0],c[2].value[0],\
c[1].value[0],c[0].value[0]]
print(pbr)
xp = np.linspace(0,144,100)
plot1 = plt.figure(1)
if cell[0] == cell_br2[0]:
plt.plot(xm,cell_br2, 'ko', label ='BR2')
plt.plot(xp,np.polyval(pbr,xp),'g:',linewidth=2)
elif cell[0] == cell_br1[0] :
plt.plot(xm,cell_br1, 'mo', label ='BR1')
plt.plot(xp,np.polyval(pbr,xp),'r:',linewidth=2)
plt.xlabel('time(hr)')
plt.ylabel('cells')
plt.legend()
dx = diff(xp)
dy1 = diff(np.polyval(pbr,xp))
deriv1 = dy1/dx
time =np.linspace(0,144,99)
plot1 = plt.figure(2)
if cell[0] == cell_br2[0]:
plt.plot(time,deriv1,'b:',linewidth=2, label ='BR2')
elif cell[0] == cell_br1[0]:
plt.plot(time,deriv1,'m:',linewidth=2, label ='BR1')
plt.xlabel('time(hr)')
plt.ylabel('miu(1/h)')
plt.legend()
plt.show()
return(deriv1)
m = GEKKO()
cell_br1 = (0.63*10**6 , 1.10*10**6, 2.06*10**6, 2.08*10**6,\
3.73*10**6, 3.89*10**6, 3.47*10**6,2.312*10**6)
cell_br2= (0.58*10**6 , 0.96*10**6, 2.07*10**6, 1.79*10**6,\
3.57*10**6, 3.34*10**6, 2.62*10**6, 1.75*10**6)
b = brain.Brain()
b.input_layer(2)
b.layer(linear=5)
b.layer(tanh=5)
b.layer(linear=5)
b.output_layer(1)
x_s = np.linspace(0,144,99)
xg = np.array([ 0.0 , 23.0 , 47.0 , 49.0 , 71.5 ,\
95.0 , 119.0 , 144.0 ])
cells_spline = CubicSpline(xm, cell_br1)
y_cells = cells_spline(x_s)
miu_1 = spline(cell_br1)
miu_2 = spline(cell_br2)
x = (x_s, y_cells)#, y_glucose) #Inputs (3)
y = (miu_1) #Output (2)
b.learn(x,y) # train
xp = np.linspace(0,144,99)
yp = b.think(x) # validate
yyp = np.array(yp)
miu = np.reshape(yyp, (99,))
plot1 = plt.figure(3)
plt.plot(xp,miu,'r-', label = 'Predicted ')
plt.plot(x_s,miu_1,'bo', label = 'Experimental points')
plt.xlabel('Time [hr]')
plt.ylabel('miu [1/h]')
plt.legend()
plt.show()
Although solver finds a solution, it is constant, which indicated that the solver is not working. My output is the following :
Can someone please help? I can't find what is failing. Thanks
Here are a couple issues with your current approach:
The training uses two inputs while the validation uses only one input
The data is not scaled. It generally helps if you scale the data to -1 to 1. I included a simple scalar but there are better ways to do this that also zero-center the data.
from gekko import brain
from gekko import GEKKO
import numpy as np
import matplotlib.pyplot as plt
from numpy import diff
from scipy.interpolate import CubicSpline
xm = np.array([ 0.0 , 23.0 , 47.0 , 49.0 ,\
71.5 , 95.0 , 119.0 , 143.0 ])
def spline(cell):
m = GEKKO()
m.options.IMODE=2
c = [m.FV(value=0) for i in range(4)]
x = m.Param(value=xm)
cell = np.array(cell)
y = m.CV(value=cell)
y.FSTATUS = 1
# polynomial model
m.Equation(y==c[0]+c[1]*x+c[2]*x**2+c[3]*x**3)
c[0].STATUS=1
m.solve(disp=False)
c[1].STATUS=1
m.solve(disp=False)
c[2].STATUS=1
c[3].STATUS=1
m.solve(disp=False)
pbr = [c[3].value[0],c[2].value[0],\
c[1].value[0],c[0].value[0]]
print(pbr)
xp = np.linspace(0,144,100)
plot1 = plt.figure(1)
if cell[0] == cell_br2[0]:
plt.plot(xm,cell_br2, 'ko', label ='BR2')
plt.plot(xp,np.polyval(pbr,xp),'g:',linewidth=2)
elif cell[0] == cell_br1[0] :
plt.plot(xm,cell_br1, 'mo', label ='BR1')
plt.plot(xp,np.polyval(pbr,xp),'r:',linewidth=2)
plt.xlabel('time(hr)')
plt.ylabel('cells')
plt.legend()
dx = diff(xp)
dy1 = diff(np.polyval(pbr,xp))
deriv1 = dy1/dx
time =np.linspace(0,144,99)
plot1 = plt.figure(2)
if cell[0] == cell_br2[0]:
plt.plot(time,deriv1,'b:',linewidth=2, label ='BR2')
elif cell[0] == cell_br1[0]:
plt.plot(time,deriv1,'m:',linewidth=2, label ='BR1')
plt.xlabel('time(hr)')
plt.ylabel('miu(1/h)')
plt.legend()
#plt.show()
return(deriv1)
cell_br1 = np.array([0.63*10**6 , 1.10*10**6, 2.06*10**6, 2.08*10**6,\
3.73*10**6, 3.89*10**6, 3.47*10**6,2.312*10**6])
cell_br2= np.array([0.58*10**6 , 0.96*10**6, 2.07*10**6, 1.79*10**6,\
3.57*10**6, 3.34*10**6, 2.62*10**6, 1.75*10**6])
b = brain.Brain(remote=True)
b.input_layer(1)
b.layer(linear=1)
b.layer(tanh=4)
b.layer(linear=1)
b.output_layer(1)
x_s = np.linspace(0,144,99)
xg = np.array([ 0.0 , 23.0 , 47.0 , 49.0 , 71.5 ,\
95.0 , 119.0 , 144.0 ])
cells_spline = CubicSpline(xm, cell_br1)
y_cells = cells_spline(x_s)
miu_1 = spline(cell_br1)
miu_2 = spline(cell_br2)
scale = [1.0e6,1.0e4]
x = (y_cells/scale[0]) #, y_glucose) #Inputs (3)
y = (miu_1/scale[1]) #Output (2)
b.learn(x,y) # train
yp = b.think(x) # validate
xp = np.linspace(0,144,99)
yyp = np.array(yp)
miu = np.reshape(yyp, (99,))
plot1 = plt.figure(3)
plt.plot(xp,miu*scale[1],'r-', label = 'Predicted ')
plt.plot(x_s,miu_1,'bo', label = 'Experimental points')
plt.xlabel('Time [hr]')
plt.ylabel('miu [1/h]')
plt.legend()
plt.show()
Recommendations:
Adjust the number of nodes and types of layers.
Use a package such as Keras or PyTorch for this type of problem. Here is a tutorial on Keras. Gekko is especially good at problems that need extra things such as constraints, non-standard activation functions, and hybrid machine learning where the model is a combination of physics-based and empirical elements.
Gekko uses gradient-based solvers that may get stuck at local minima.
There is an equation of exponential truncated power law in the article below:
Gonzalez, M. C., Hidalgo, C. A., & Barabasi, A. L. (2008). Understanding individual human mobility patterns. Nature, 453(7196), 779-782.
like this:
It is an exponential truncated power law. There are three parameters to be estimated: rg0, beta and K. Now we have got several users' radius of gyration(rg), and uploaded it onto Github: radius of gyrations.txt
The following codes can be used to read data and calculate P(rg):
import numpy as np
# read radius of gyration from file
rg = []
with open('/path-to-the-data/radius of gyrations.txt', 'r') as f:
for i in f:
rg.append(float(i.strip('\n')))
# calculate P(rg)
rg = sorted(rg, reverse=True)
rg = np.array(rg)
prg = np.arange(len(sorted_data)) / float(len(sorted_data)-1)
or you can directly get rg and prg data as the following:
rg = np.array([ 20.7863444 , 9.40547933, 8.70934714, 8.62690145,
7.16978087, 7.02575052, 6.45280959, 6.44755478,
5.16630287, 5.16092884, 5.15618737, 5.05610068,
4.87023561, 4.66753197, 4.41807645, 4.2635671 ,
3.54454372, 2.7087178 , 2.39016885, 1.9483156 ,
1.78393238, 1.75432688, 1.12789787, 1.02098332,
0.92653501, 0.32586582, 0.1514813 , 0.09722761,
0. , 0. ])
prg = np.array([ 0. , 0.03448276, 0.06896552, 0.10344828, 0.13793103,
0.17241379, 0.20689655, 0.24137931, 0.27586207, 0.31034483,
0.34482759, 0.37931034, 0.4137931 , 0.44827586, 0.48275862,
0.51724138, 0.55172414, 0.5862069 , 0.62068966, 0.65517241,
0.68965517, 0.72413793, 0.75862069, 0.79310345, 0.82758621,
0.86206897, 0.89655172, 0.93103448, 0.96551724, 1. ])
I can plot the P(r_g) and r_g using the following python script:
import matplotlib.pyplot as plt
%matplotlib inline
plt.plot(rg, prg, 'bs', alpha = 0.3)
# roughly estimated params:
# rg0=1.8, beta=0.15, K=5
plt.plot(rg, (rg+1.8)**-.15*np.exp(-rg/5))
plt.yscale('log')
plt.xscale('log')
plt.xlabel('$r_g$', fontsize = 20)
plt.ylabel('$P(r_g)$', fontsize = 20)
plt.show()
How can I use these data of rgs to estimate the three parameters above? I hope to solve it using python.
According to #Michael 's suggestion, we can solve the problem using scipy.optimize.curve_fit
def func(rg, rg0, beta, K):
return (rg + rg0) ** (-beta) * np.exp(-rg / K)
from scipy import optimize
popt, pcov = optimize.curve_fit(func, rg, prg, p0=[1.8, 0.15, 5])
print popt
print pcov
The results are given below:
[ 1.04303608e+03 3.02058550e-03 4.85784945e+00]
[[ 1.38243336e+18 -6.14278286e+11 -1.14784675e+11]
[ -6.14278286e+11 2.72951900e+05 5.10040746e+04]
[ -1.14784675e+11 5.10040746e+04 9.53072925e+03]]
Then we can inspect the results by plotting the fitted curve.
%matplotlib inline
import matplotlib.pyplot as plt
plt.plot(rg, prg, 'bs', alpha = 0.3)
plt.plot(rg, (rg+popt[0])**-(popt[1])*np.exp(-rg/popt[2]) )
plt.yscale('log')
plt.xscale('log')
plt.xlabel('$r_g$', fontsize = 20)
plt.ylabel('$P(r_g)$', fontsize = 20)
plt.show()
I'd like to implement a hierarchical bayesian model with PyMC3. Before designing a complex model, I'm trying to get accustomed with PyMC3 by implementing Bayes PCA and comparing the results with sklearn.decomposition.pca
In 1:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
# Generate data
nsamp_cl = 1000 #Number of samples per class and per site
cov = np.matrix([[1, 0.9, 0.05],
[0.9, 1, 0.05],
[0.05, 0.05, 1]])
nfeat = cov.shape[0] #Number of features
X0 = np.random.multivariate_normal(np.zeros(nfeat),cov,nsamp_cl)
X1 = np.random.multivariate_normal(np.zeros(nfeat),cov,nsamp_cl)
# Rotate class 1
theta = np.radians(90)
cos, sin = np.cos(theta), np.sin(theta)
R = np.matrix('{} {}; {} {}'.format(cos, -sin, sin, cos))
X1[:,0:2] = np.dot(X1[:,0:2],R.T)
X = np.concatenate([X0,X1])
Y = np.concatenate([np.zeros(X0.shape[0]),np.ones(X1.shape[0])])
n = X.shape[0]
d = X.shape[1]
In 2:
plt.figure()
cols = ['b','r']
colors = [cols[y.astype(int)] for y in Y ]
plt.scatter(X[:,0],X[:,1],20,colors)
plt.title('Features 0 and 1')
plt.figure()
cols = ['b','r']
colors = [cols[y.astype(int)] for y in Y ]
plt.scatter(X[:,1],X[:,2],20,colors)
plt.title('Features 1 and 2')
Out 2:
In 3:
from pymc3 import Model,Normal,Gamma,math,variational
common_latent_model = Model()
# Builiding a latent model to extract site-robust principal components
with common_latent_model:
n_latent = 3
#ARD prior
alphas = Gamma('alphas', alpha=1e-6, beta=1e-6, shape=n_latent)
# Weight vector
w = Normal('w',mu=0,tau=alphas,shape=(d,n_latent))
# Latent space
z = Normal('z',mu=0,tau=1,shape=(n,n_latent))
# Multiply latent variables by W to go from latent to observation space
t = math.dot(z,w.T)
# Add bias
mu = Normal('mu', mu=0, tau=0.01, shape=d)
u = t + mu
# Precision of the observation
sigma = Gamma('sigma',alpha=1e-6, beta=1e-6,shape=1)
# Likelihood (sampling distribution) of observations
X_obs = Normal('X_obs', mu=u, tau=sigma, observed=X)
with common_latent_model:
means, sds, elbos = variational.advi(n=10000,learning_rate=0.1, accurate_elbo=True)#100000)
plt.plot(elbos)
plt.ylabel('ELBO')
plt.xlabel('iteration')
In [4]:
for key in means:
print "key: %s , value: %s" % (key, means[key])
key: mu , value: [ 0.03288066 -0.05347487 0.00260641]
key: alphas_log_ , value: [ 6.94631195 6.85621834 6.84792233]
key: sigma_log_ , value: [-0.009662]
key: z , value: [[-0.01260083 -0.00460729 -0.01360558]
[-0.02817471 0.04281501 0.01643355]
[-0.05178572 -0.02470609 -0.05092171]
...,
[-0.05201711 0.00150599 -0.01167801]
[-0.01097088 -0.02666511 0.03660954]
[ 0.0609949 0.01156182 0.01814843]]
key: w , value: [[-0.06004834 0.00599346 -0.03071374]
[ 0.00668656 -0.01306511 0.00400904]
[-0.00141243 -0.00778869 0.03257137]]
In [5]:
PC_bayes = means['z']
plt.figure()
cols = ['b','r']
colors = [cols[y.astype(int)] for y in Y ]
plt.scatter(PC_bayes[:,0],PC_bayes[:,1],20,colors,alpha=.1)
Out [5]:
In [6]:
from sklearn.decomposition import PCA
pca = PCA(n_components=2)
pca.fit(X)
PC = pca.transform(X)
In [7]:
plt.figure()
cols = ['b','r']
colors = [cols[y.astype(int)] for y in Y ]
plt.scatter(PC[:,0],PC[:,1],20,colors,alpha=.1)
Out [7]:
(You can find the iPython Notebook here:
https://github.com/peppeFarAway/pymc3/blob/master/BayesPCA.ipynb)
Why can't my Bayes PCA implementation recover the Principal Components, while sklearn.decomposition.pca can? Where am I making a mistake?
The main reference I used to implement the model are:
https://blogs.msdn.microsoft.com/infernet_team_blog/2011/09/30/bayesian-pca/
Does anyone know how to create a plot like the one below using Python? I would like to know how to plot the straight lines and the shaded area.
Hypervolume + Surface Attainment plot:
Both hypervolume and surface attainment are used as performance indicators for a multi-objective optimisation algorithm.
I managed to figure this out by myself in the end. Here is my approach (compares the performance of the multi-objective algorithm before and after a certain change):
import matplotlib.pyplot as plt
cases = ['before', 'after']
objectives = ['obj1', 'obj2']
colors = ['green', 'red']
data = {case : pd.read_csv(case +'.csv', index_col=False) for case in cases}
fig, axes = plt.subplots()
for j, case in enumerate(cases):
obj1 = sorted(data[case][objectives[0]].values)
obj2 = sorted(data[case][objectives[1]].values)
#Plot the first two pts outside the loop to display the label
axes.plot([obj1[0], obj1[0]], [obj2[0], obj2[1]],
color=colors[j],
label=case)
axes.plot([obj1[0], obj1[1]], [obj2[1], obj2[1]],
color=colors[j])
for i in range(2, len(obj1)):
axes.plot([obj1[i - 1], obj1[i - 1]], [obj2[i - 1], obj2[i]],
color=colors[j])
axes.plot([obj1[i - 1], obj1[i]], [obj2[i], obj2[i]],
color=colors[j])
axes.set_xlabel(objectives[0])
axes.set_ylabel(objectives[1])
axes.plot(nadir_r[0],
nadir_r[1],
'*',
label='nadir')
axes.plot(ideal_r[0],
ideal_r[1],
'x',
label='ideal')
plt.xlim(nadir_r[0], ideal_r[0])
plt.ylim(ideal_r[1], nadir_r[1])
axes.legend(loc='best')
This would produce the following plot (which shows that the "change" had a negative impact).
Any constructive criticism is welcome.
Try the function below.
It produces this:
import numpy as np
import matplotlib.pyplot as plt
def plot_hyper_volume(x, y, reference_point):
x = np.array(x)
y = np.array(y)
# Zip x and y into a numpy ndarray
coordinates = np.array(sorted(zip(x, y)))
# Empty pareto set
pareto_set = np.full(coordinates.shape, np.inf)
i = 0
for point in coordinates:
if i == 0:
pareto_set[i] = point
i += 1
elif point[1] < pareto_set[:, 1].min():
pareto_set[i] = point
i += 1
# Get rid of unused spaces
pareto_set = pareto_set[:i + 1, :]
# Add reference point to the pareto set
pareto_set[i] = reference_point
# These points will define the path to be plotted and filled
x_path_of_points = []
y_path_of_points = []
for index, point in enumerate(pareto_set):
if index < i - 1:
plt.plot([point[0], point[0]], [point[1], pareto_set[index + 1][1]], marker='o', markersize=4, c='#4270b6',
mfc='black', mec='black')
plt.plot([point[0], pareto_set[index + 1][0]], [pareto_set[index + 1][1], pareto_set[index + 1][1]],
marker='o', markersize=4, c='#4270b6', mfc='black', mec='black')
x_path_of_points += [point[0], point[0], pareto_set[index + 1][0]]
y_path_of_points += [point[1], pareto_set[index + 1][1], pareto_set[index + 1][1]]
# Link 1 to Reference Point
plt.plot([pareto_set[0][0], reference_point[0]], [pareto_set[0][1], reference_point[1]], marker='o', markersize=4,
c='#4270b6', mfc='black', mec='black')
# Link 2 to Reference Point
plt.plot([pareto_set[-1][0], reference_point[0]], [pareto_set[-2][1], reference_point[1]], marker='o', markersize=4,
c='#4270b6', mfc='black', mec='black')
# Highlight the Reference Point
plt.plot(reference_point[0], reference_point[1], 'o', color='red', markersize=8)
# Fill the area between the Pareto set and Ref y
plt.fill_betweenx(y_path_of_points, x_path_of_points, max(x_path_of_points) * np.ones(len(x_path_of_points)),
color='#dfeaff', alpha=1)
plt.xlabel(r"$f_{\mathrm{1}}(x)$", fontsize=18)
plt.ylabel(r"$f_{\mathrm{2}}(x)$", fontsize=18)
plt.tight_layout()
plt.show()
x = [3.237887527110896, 3.253687689154591, 2.6652460044515833, 4.457925635186012, 3.2080041315472436,
3.196941509672105, 3.1489091846746784, 3.3727329491336504, 3.2980773800599184, 3.330938419470396,
3.0504985412687606, 3.9698249482752517, 3.1229599570521005, 3.1278496698518365, 3.3410372676796367,
3.267339581661665, 3.1531143837247226, 3.13276172330508, 3.2136444869087857, 3.123114522218978,
2.8975316624890177, 3.855654194881272, 2.982889291813081, 4.001132534228973, 3.222172022988187,
3.2918121975763426, 3.119437722697402, 3.1915652020314855, 3.228511161109151, 3.410632525789594,
3.303983909300615, 3.23152665486737, 3.12295981695552, 3.123114522218978, 3.2134999576177727, 3.3042667387038263,
3.379569640868453, 3.2597834943233255, 3.2365405477218783, 3.2911687133624765, 3.1835704013006616,
3.1363291696973903, 3.1422814239459718, 3.1202417240558282, 3.1311337111075535, 3.630375287962374,
3.181708872213033, 3.2993090610457774, 3.130988434129236, 3.12295981695552]
y = [3.5471650938674886, 3.446106995806536, 4.203717903464938, 2.53919767967234, 3.550936505497275, 3.553107544090778,
3.648527616343033, 3.3076630507066875, 3.3498580074252184, 3.320019542824605, 3.752735495100324,
2.853419245618656, 3.71421025754152, 3.709380479951111, 3.307305266656153, 3.5468875845765266, 3.7140635724580386,
3.6981877554394647, 3.6799057516573295, 3.714041693123077, 3.831023904192475, 2.873083146950819,
4.195177914971685, 2.8302302203075165, 3.4285829711629616, 3.3624540805968035, 3.7156683374998387,
3.5623734317415163, 3.627757758118092, 3.2755855532302336, 3.318743777730811, 3.7213811168338164,
3.714210410334474, 3.714041693123077, 3.45357840149967, 3.6337156456167627, 3.270784928858892, 3.400665041601096,
3.5451613263457076, 3.357372990242752, 3.5705676117602683, 3.6930983812240736, 3.687202266647831,
3.717332517575802, 3.7061199284167357, 3.1769420991200708, 3.492240477791187, 3.512518414215774,
3.7040103293332383, 3.714210410334474]
ref = [max(x), max(y)]
plot_hyper_volume(x=x, y=y, reference_point=ref)