Need help on plotting this piecewise linear interpolation code in Python - python

I am trying my hand on interpolating data using a simple linear function, Lagrange Interpolating Polynomial. I have managed to get the required equations, however, I am not able to figure out how to plot it piece-wise. I do understand using sympy is not the best way forward, but I am a noob and I wanted to see how my equations look.
How can I make it plot in matplotlib without having the need to manually type the equations at the end?
import numpy as np
import matplotlib.pyplot as plt
import sympy as sym
x = sym.Symbol('x')
year = np.arange(1960,2020,10)
pop = [179323,203302,226542,249633,281422,308746]
def lgn(a,b): #this ideally should be taking a value of x where you'd like to interpolate.
result = []
for i in range(1,len(a)):
L0 = (x - a[i])/(a[i-1] - a[i])
L1 = (x - a[i-1])/(a[i] - a[i-1])
temp = (L0 * b[i-1]) + (L1 * b[i])
result.append(temp)
return result
lgn(year,pop) #result gives a list of linear equations between each year value.
[23979*x/10 - 4520561,
2324*x - 4374978,
23091*x/10 - 4345476,
31789*x/10 - 6076378,
13662*x/5 - 5183378]
#plotting for each interval. this is what I am trying to code.
x1 = np.linspace(year[0],year[1],10)
y1 = 23979 * x1/10 - 4520561
x2 = np.linspace(year[1],year[2],10)
y2 = 2324*x2 - 4374978
x3 = np.linspace(year[2],year[3],10)
y3 = 23091*x3/10 - 4345476
x4 = np.linspace(year[3],year[4],10)
y4 = 31789*x4/10 - 6076378
x5 = np.linspace(year[4],year[5],10)
y5 = 13662*x5/5 - 5183378
plt.plot(year,pop,'ro',x1,y1,x2,y2,x3,y3,x4,y4,x5,y5 )

To convert a sympy expression to a numerical value with expr.subs(x, 123).evalf(). Note that this only works for substituting a single value. To work with arrays, sym.lambdify() can convert the expression to a function that understands numpy arrays, which then can be used for plotting.
Here is some example code:
import numpy as np
import matplotlib.pyplot as plt
import sympy as sym
def lgn(a, b):
result = []
for i in range(1, len(a)):
L0 = (x - a[i]) / (a[i - 1] - a[i])
L1 = (x - a[i - 1]) / (a[i] - a[i - 1])
temp = (L0 * b[i - 1]) + (L1 * b[i])
result.append(temp)
return result
x = sym.Symbol('x')
year = np.arange(1960, 2020, 10)
pop = [179323, 203302, 226542, 249633, 281422, 308746]
equations = lgn(year, pop)
for i in range(1, len(year)):
xi = np.linspace(year[i - 1], year[i], 10)
yi_np = sym.lambdify(x, equations[i - 1])
yi = yi_np(xi)
plt.plot(xi, yi)
plt.plot(year, pop, 'ro')
plt.show()
Here is an approach using sympy's Piecewise:
import numpy as np
import matplotlib.pyplot as plt
import sympy as sym
def line_2points(a0, b0, a1, b1):
L0 = (x - a1) / (a0 - a1)
L1 = (x - a0) / (a1 - a0)
return L0 * b0 + L1 * b1
x = sym.Symbol('x')
year = np.arange(1960, 2020, 10)
pop = [179323, 203302, 226542, 249633, 281422, 308746]
eq = 0
for i in range(1, len(year)):
eq = sym.Piecewise( (line_2points(year[i-1], pop[i-1], year[i], pop[i]), (x >= year[i-1] ) & (x <= year[i] ) ),
(eq, True) )
# sym.plot(eq, (x, year[0], year[-1])) # this also works, but the visualization is much harder to customize
eq_np = sym.lambdify(x, eq)
xs = np.linspace(year[0], year[-1], 200)
plt.plot(xs, eq_np(xs))
plt.plot(year, pop, 'ro')
plt.show()

Related

Fitting curve with conditions

I'm trying to simulate an exoplanet transit and to determine its orbital characteristics with curve fitting. However, the intersection area between two circles needs to distinguish two cases: if the center of the smallest circle is in the biggest or not. This is a problem for scipy with the function curve_fit, calling an array in my function cacl_aire. The function transit simulates the smallest disc's evolution with time.
Here's my code:
import numpy as np
from matplotlib import pyplot as plt
from scipy.optimize import curve_fit
import xlrd
dt = 0.1
Vx = 0.08
Vy = 0
X0 = -5
Y0 = 0
R = 2
r = 0.7
X = X0
Y = Y0
doc = xlrd.open_workbook("transit data.xlsx")
feuille_1 = doc.sheet_by_index(0)
mag = [feuille_1.cell_value(rowx=k, colx=4) for k in range(115)]
T = [feuille_1.cell_value(rowx=k, colx=3) for k in range(115)]
def calc_aire(r, x, y):
D2 = x * x + y * y
if D2 >= (r + R)**2:
return 0
d = (r**2 - R**2 + D2) / (2 * (D2**0.5))
d2 = D2**0.5 - d
if abs(d) >= r:
return min([r * r * np.pi, R * R * np.pi])
H = (r * r - d * d)**0.5
As = np.arccos(d / r) * r * r - d * H
As2 = R * R * np.arccos(d2 / R) - d2 * H
return As + As2
def transit(t, r, X0, Y0, Vx, Vy):
return -calc_aire(r, X0 + Vx * t, Y0 + Vy * t)
best_vals = curve_fit(transit, T, mag)[0]
print('best_vals: {}'.format(best_vals))
plt.figure()
plt.plot(T, mag)
plt.draw()
I have the following error :
ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all() with the line 28 :
if D2 >= (r + R)**2:
Here is my database:
https://drive.google.com/file/d/1SP12rrHGjjpHfKBQ0l3nVMJDIRCPlkuf/view?usp=sharing
I don't see any trick to solve my problem.

Transport equation in 1D (python)

I'm trying to write a python program to solve the convection equation in 1D using the finite differences method (upwind scheme). The problem is as follows:
Here's what I've attempted
from numpy import *
from numpy.linalg import *
from matplotlib.pyplot import *
def u0(x):
if (0.4 <= x <= 0.5):
y = 10*(x - 0.4)
elif (0.5 <= x <= 0.6):
y = 10*(0.6 - x)
else:
y = 0
return y
print('Choix de la vitesse de transport c : ')
c = float(input('c = '))
def solex(x, t):
return u0(x - c*t)
print('Choix de pas h : ')
h = float(input('h = '))
print('Choix du pas dt et du temps final T : ')
dt = float(input('dt = '))
T = float(input('T = '))
# Maillage
N = int((1/h) - 1)
x = linspace(0, 1, N + 2)
M = int((T/dt) - 1)
t = linspace(0, T, M + 2)
# Itération
U1 = zeros(N)
U2 = zeros(N)
sol = zeros((N, M + 2))
for i in range(1, N + 1):
U1[i - 1] = u0(x[i])
sol[:, 0] = U1
for j in range(1, size(t)):
for i in range(1, N-1):
U2[i] = U1[i] - c*(dt/h)*(U1[i] - U1[i - 1])
sol[:, j] = U2
U1 = U2
It doesn't seem to work and I don't know why
Though you said you already solved your problem, I would still like to suggest some general improvements:
wildcard imports like from numpy import * are considered bad practice, better use import numpy as np and refer to the necessary functions as np.linspace etc.
the power of numpy comes from vectorization, so try to replace as much for-loops as possible by vectorized operations.
at least from what you showed us, the variables U1 and U2 are not really necessary.
using input for every single parameter might be overkill
The following code includes my suggested improvements. Note how I replaced your u0 with a vectorized version using np.piecewise and replaced several for-loops. I also added a visualisation.
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
def u0(x):
y= np.piecewise(
x,
[(0.4 <= x)&(x <= 0.5), (0.5 <= x)&(x<= 0.6) ],
[lambda x: 10*(x - 0.4), lambda x: 10*(0.6 - x), 0])
return y
c = 0.9
h = 0.01
dt = 0.01
T = 2
N = int(np.ceil(1/h))
x = np.linspace(0, 1, N)
M = int(np.ceil(T/dt))
t = np.linspace(0, T, M)
#solve with upwind scheme
sol = np.zeros((N, M))
sol[:,0] = u0(x)
#you could add boundary values here by setting
#sol[0,:] = <your_boundary_data>
for i in range(1,len(t)):
sol[1:,i] = sol[1:,i-1] - c*(dt/h)*(sol[1:,i-1] - sol[:-1,i-1])
#Visualization
fig = plt.figure()
ax = fig.add_subplot(projection='3d')
ax.set_xlabel('x')
ax.set_ylabel('t')
T, X = np.meshgrid(t, x)
surf = ax.plot_surface(X, T, sol)

Still having trouble with curve fitting

I already opened a question on this topic, but I wasn't sure, if I should post it there, so I opened a new question here.
I have trouble again when fitting two or more peaks. First problem occurs with a calculated example function.
xg = np.random.uniform(0,1000,500)
mu1 = 200
sigma1 = 20
I1 = -2
mu2 = 800
sigma2 = 20
I2 = -1
yg3 = 0.0001*xg
yg1 = (I1 / (sigma1 * np.sqrt(2 * np.pi))) * np.exp( - (xg - mu1)**2 / (2 * sigma1**2) )
yg2 = (I2 / (sigma2 * np.sqrt(2 * np.pi))) * np.exp( - (xg - mu2)**2 / (2 * sigma2**2) )
yg=yg1+yg2+yg3
plt.figure(0, figsize=(8,8))
plt.plot(xg, yg, 'r.')
I tried two different approaches, I found in the documentation, which are shown below (modified for my data), but both give me wrong fitting data and a messy chaos of graphs (I guess one line per fitting step).
1st attempt:
import numpy as np
from lmfit.models import PseudoVoigtModel, LinearModel, GaussianModel, LorentzianModel
import sys
import matplotlib.pyplot as plt
gauss1 = PseudoVoigtModel(prefix='g1_')
pars.update(gauss1.make_params())
pars['g1_center'].set(200)
pars['g1_sigma'].set(15, min=3)
pars['g1_amplitude'].set(-0.5)
pars['g1_fwhm'].set(20, vary=True)
#pars['g1_fraction'].set(0, vary=True)
gauss2 = PseudoVoigtModel(prefix='g2_')
pars.update(gauss2.make_params())
pars['g2_center'].set(800)
pars['g2_sigma'].set(15)
pars['g2_amplitude'].set(-0.4)
pars['g2_fwhm'].set(20, vary=True)
#pars['g2_fraction'].set(0, vary=True)
mod = gauss1 + gauss2 + LinearModel()
pars.add('intercept', value=0, vary=True)
pars.add('slope', value=0.0001, vary=True)
init = mod.eval(pars, x=xg)
out = mod.fit(yg, pars, x=xg)
print(out.fit_report(min_correl=0.5))
plt.figure(5, figsize=(8,8))
out.plot_fit()
When I include the 'fraction'-parameter, I often get
'NameError: name 'pv1_fraction' is not defined in expr='<_ast.Module object at 0x00000000165E03C8>'.
although it should be defined. I get this Error for real data with this approach, too.
2nd attempt:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import lmfit
def gauss(x, sigma, mu, A):
return A*np.exp(-(x-mu)**2/(2*sigma**2))
def linear(x, m, n):
return m*x + n
peak1 = lmfit.model.Model(gauss, prefix='p1_')
peak2 = lmfit.model.Model(gauss, prefix='p2_')
lin = lmfit.model.Model(linear, prefix='l_')
model = peak1 + lin + peak2
params = model.make_params()
params['p1_mu'] = lmfit.Parameter(value=200, min=100, max=250)
params['p2_mu'] = lmfit.Parameter(value=800, min=100, max=1000)
params['p1_sigma'] = lmfit.Parameter(value=15, min=0.01)
params['p2_sigma'] = lmfit.Parameter(value=20, min=0.01)
params['p1_A'] = lmfit.Parameter(value=-2, min=-3)
params['p2_A'] = lmfit.Parameter(value=-2, min=-3)
params['l_m'] = lmfit.Parameter(value=0)
params['l_n'] = lmfit.Parameter(value=0)
out = model.fit(yg, params, x=xg)
print out.fit_report()
plt.figure(8, figsize=(8,8))
out.plot_fit()
So the result looks like this, in both cases. It seems to plot all fitting attempts, but never solves it correctly. The best fitted parameters are in the range that I gave it.
Anyone knows this type of error? Or has any solutions for this? And does anyone know how to avoid the NameError when calling a model function from lmfit with those approaches?
I have a somewhat tolerable solution for you. Since I don't know how variable your data is, I cannot say that it will work in a general sense but should get you started. If your data is along 0-1000 and has two peaks or dips along a line as you showed, then it should work.
I used the scipy curve_fit and put all of the components of the function together into one function. One can pass starting locations into the curve_fit function. (you can probably do this with the lib you're using but I'm not familiar with it) There is a loop in loop where I vary the mu parameters to find the ones with the lowest squared error. If you are needing to fit your data many times or in some real-time scenario then this is not for you but if you just need to fit some data, launch this code and grab a coffee.
from scipy.optimize import curve_fit
import numpy as np
import matplotlib.pyplot as plt
import pylab
from matplotlib import cm as cm
import time
def my_function_big(x, m, n, #lin vars
sigma1, mu1, I1, #gaussian 1
sigma2, mu2, I2): #gaussian 2
y = m * x + n + (I1 / (sigma1 * np.sqrt(2 * np.pi))) * np.exp( - (x - mu1)**2 / (2 * sigma1**2) ) + (I2 / (sigma2 * np.sqrt(2 * np.pi))) * np.exp( - (x - mu2)**2 / (2 * sigma2**2) )
return y
#make some data
xs = np.random.uniform(0,1000,500)
mu1 = 200
sigma1 = 20
I1 = -2
mu2 = 800
sigma2 = 20
I2 = -1
yg3 = 0.0001 * xs
yg1 = (I1 / (sigma1 * np.sqrt(2 * np.pi))) * np.exp( - (xs - mu1)**2 / (2 * sigma1**2) )
yg2 = (I2 / (sigma2 * np.sqrt(2 * np.pi))) * np.exp( - (xs - mu2)**2 / (2 * sigma2**2) )
ys = yg1 + yg2 + yg3
xs = np.array(xs)
ys = np.array(ys)
#done making data
#start a double loop...very expensive but this is quick and dirty
#it would seem that the regular optimizer has trouble finding the minima so i
#found that having the near proper mu values helped it zero in much better
start = time.time()
serr = []
_x = []
_y = []
for x in np.linspace(0, 1000, 61):
for y in np.linspace(0, 1000, 61):
cfiti = curve_fit(my_function_big, xs, ys, p0=[0, 0, 1, x, 1, 1, y, 1], maxfev=20000000)
serr.append(np.sum((my_function_big(xs, *cfiti[0]) - ys) ** 2))
_x.append(x)
_y.append(y)
serr = np.array(serr)
_x = np.array(_x)
_y = np.array(_y)
print 'done loop in loop fitting'
print 'time: %0.1f' % (time.time() - start)
gridsize=20
plt.subplot(111)
plt.hexbin(_x, _y, C=serr, gridsize=gridsize, cmap=cm.jet, bins=None)
plt.axis([_x.min(), _x.max(), _y.min(), _y.max()])
cb = plt.colorbar()
cb.set_label('SE')
plt.show()
ix = np.argmin(serr.ravel())
mustart1 = _x.ravel()[ix]
mustart2 = _y.ravel()[ix]
print mustart1
print mustart2
cfit = curve_fit(my_function_big, xs, ys, p0=[0, 0, 1, mustart1, 1, 1, mustart2, 1], maxfev=2000000000)
xp = np.linspace(0, 1000, 1001)
plt.figure()
plt.scatter(xs, ys) #plot synthetic dat
plt.plot(xp, my_function_big(xp, *cfit[0]), '-', label='fit function') #plot data evaluated along 0-1000
plt.legend(loc=3, numpoints=1, prop={'size':12})
plt.show()
pylab.close()
Good luck!
In your first attempt:
pars['g1_fraction'].set(0, vary=True)
The fraction must be a value between 0 and 1, but I believe that cannot be zero. Try to put something like 0.000001, and it will work.

how can I do a maximum likelihood regression using scipy.optimize.minimize

How can I do a maximum likelihood regression using scipy.optimize.minimize? I specifically want to use the minimize function here, because I have a complex model and need to add some constraints. I am currently trying a simple example using the following:
from scipy.optimize import minimize
def lik(parameters):
m = parameters[0]
b = parameters[1]
sigma = parameters[2]
for i in np.arange(0, len(x)):
y_exp = m * x + b
L = sum(np.log(sigma) + 0.5 * np.log(2 * np.pi) + (y - y_exp) ** 2 / (2 * sigma ** 2))
return L
x = [1,2,3,4,5]
y = [2,3,4,5,6]
lik_model = minimize(lik, np.array([1,1,1]), method='L-BFGS-B', options={'disp': True})
When I run this, convergence fails. Does anyone know what is wrong with my code?
The message I get running this is 'ABNORMAL_TERMINATION_IN_LNSRCH'. I am using the same algorithm that I have working using optim in R.
Thank you Aleksander. You were correct that my likelihood function was wrong, not the code. Using a formula I found on wikipedia I adjusted the code to:
import numpy as np
from scipy.optimize import minimize
def lik(parameters):
m = parameters[0]
b = parameters[1]
sigma = parameters[2]
for i in np.arange(0, len(x)):
y_exp = m * x + b
L = (len(x)/2 * np.log(2 * np.pi) + len(x)/2 * np.log(sigma ** 2) + 1 /
(2 * sigma ** 2) * sum((y - y_exp) ** 2))
return L
x = np.array([1,2,3,4,5])
y = np.array([2,5,8,11,14])
lik_model = minimize(lik, np.array([1,1,1]), method='L-BFGS-B')
plt.scatter(x,y)
plt.plot(x, lik_model['x'][0] * x + lik_model['x'][1])
plt.show()
Now it seems to be working.
Thanks for the help!

scipy.odeint strange behavior

Here is my code to solve differential equation dy / dt = 2 / sqrt(pi) * exp(-x * x) to plot erf(x).
import matplotlib.pyplot as plt
from scipy.integrate import odeint
import numpy as np
import math
def euler(df, f0, x):
h = x[1] - x[0]
y = [f0]
for i in xrange(len(x) - 1):
y.append(y[i] + h * df(y[i], x[i]))
return y
def i(df, f0, x):
h = x[1] - x[0]
y = [f0]
y.append(y[0] + h * df(y[0], x[0]))
for i in xrange(1, len(x) - 1):
fn = df(y[i], x[i])
fn1 = df(y[i - 1], x[i - 1])
y.append(y[i] + (3 * fn - fn1) * h / 2)
return y
if __name__ == "__main__":
df = lambda y, x: 2.0 / math.sqrt(math.pi) * math.exp(-x * x)
f0 = 0.0
x = np.linspace(-10.0, 10.0, 10000)
y1 = euler(df, f0, x)
y2 = i(df, f0, x)
y3 = odeint(df, f0, x)
plt.plot(x, y1, x, y2, x, y3)
plt.legend(["euler", "modified", "odeint"], loc='best')
plt.grid(True)
plt.show()
And here is a plot:
Am I using odeint in a wrong way or it's a bug?
Notice that if you change x to x = np.linspace(-5.0, 5.0, 10000), then your code works. Therefore, I suspect the problem has something to do with exp(-x*x) being too small when x is very small or very large. [Total speculation: Perhaps the odeint (lsoda) algorithm adapts its stepsize based on values sampled around x = -10 and increases the stepsize in such a way that values around x = 0 are missed?]
The code can be fixed by using the tcrit parameter, which tells odeint to pay special attention around certain critical points.
So, by setting
y3 = integrate.odeint(df, f0, x, tcrit = [0])
we tell odeint to sample more carefully around 0.
import matplotlib.pyplot as plt
import scipy.integrate as integrate
import numpy as np
import math
def euler(df, f0, x):
h = x[1] - x[0]
y = [f0]
for i in xrange(len(x) - 1):
y.append(y[i] + h * df(y[i], x[i]))
return y
def i(df, f0, x):
h = x[1] - x[0]
y = [f0]
y.append(y[0] + h * df(y[0], x[0]))
for i in xrange(1, len(x) - 1):
fn = df(y[i], x[i])
fn1 = df(y[i - 1], x[i - 1])
y.append(y[i] + (3 * fn - fn1) * h / 2)
return y
def df(y, x):
return 2.0 / np.sqrt(np.pi) * np.exp(-x * x)
if __name__ == "__main__":
f0 = 0.0
x = np.linspace(-10.0, 10.0, 10000)
y1 = euler(df, f0, x)
y2 = i(df, f0, x)
y3 = integrate.odeint(df, f0, x, tcrit = [0])
plt.plot(x, y1)
plt.plot(x, y2)
plt.plot(x, y3)
plt.legend(["euler", "modified", "odeint"], loc='best')
plt.grid(True)
plt.show()

Categories