Adjusting shape of a data array to perform optimization in SciPy - python

I have a code which performs optimization to infer a parameter:
import numpy as np
from scipy.integrate import odeint
import matplotlib.pyplot as plt
from scipy.optimize import root
from scipy.optimize import minimize
import pandas as pd
d = {'Week': [1, 2,3,4,5,6,7,8,9,10,11], 'incidence': [206.1705794,2813.420201,11827.9453,30497.58655,10757.66954,7071.878779,3046.752723,1314.222882,765.9763902,201.3800578,109.8982006]}
df = pd.DataFrame(data=d)
def peak_infections(beta, df):
# Weeks for which the ODE system will be solved
weeks = df.Week.to_numpy()
# Total population, N.
N = 100000
# Initial number of infected and recovered individuals, I0 and R0.
I0, R0 = 10, 0
# Everyone else, S0, is susceptible to infection initially.
S0 = N - I0 - R0
J0 = I0
# Contact rate, beta, and mean recovery rate, gamma, (in 1/days).
#reproductive no. R zero is beta/gamma
gamma = 1/7 * 7 #rate should be in weeks now
# A grid of time points
t = np.linspace(0, weeks[-1], weeks[-1] + 1)
# The SIR model differential equations.
def deriv(y, t, N, beta, gamma):
S, I, R, J = y
dS = ((-beta * S * I) / N)
dI = ((beta * S * I) / N) - (gamma * I)
dR = (gamma * I)
dJ = ((beta * S * I) / N)
return dS, dI, dR, dJ
# Initial conditions are S0, I0, R0
# Integrate the SIR equations over the time grid, t.
solve = odeint(deriv, (S0, I0, R0, J0), t, args=(N, beta, gamma))
S, I, R, J = solve.T
return I/N
def residual(x, df):
# Total population, N.
N = 100000
incidence = df.incidence.to_numpy()/N
return np.sum((peak_infections(x, df)[1:] - incidence) ** 2)
x0 = 0.5
res = minimize(residual, x0, args=(df), method="Nelder-Mead").x
print(res)
However, it is not giving the correct values, so instead of taking weeks as 1,2,3... in the line d = {'Week': [1, 2,3,4,5,6,7,8,9,10,11], 'incidence': [206.1705794,2813.420201,11827.9453,30497.58655,10757.66954,7071.878779,3046.752723,1314.222882,765.9763902,201.3800578,109.8982006]} I'd like to use days instead so Python has clearer information to work with. I'd like to slice the linspace of days as weekly intervals. However, I'm having some shape alignment issues:
import numpy as np
from scipy.integrate import odeint
import matplotlib.pyplot as plt
from scipy.optimize import root
from scipy.optimize import minimize
import pandas as pd
time = np.linspace(0, 77, 77 + 1)
d = {'Week': [time[7],time[14],time[21],time[28],time[35],time[42],time[49],time[56],time[63],time[70],time[77]], 'incidence': [206.1705794,2813.420201,11827.9453,30497.58655,10757.66954,7071.878779,3046.752723,1314.222882,765.9763902,201.3800578,109.8982006]}
#d = {'Week': [1, 2,3,4,5,6,7,8,9,10,11], 'incidence': [206.1705794,2813.420201,11827.9453,30497.58655,10757.66954,7071.878779,3046.752723,1314.222882,765.9763902,201.3800578,109.8982006]}
df = pd.DataFrame(data=d)
def peak_infections(beta, df):
# Weeks for which the ODE system will be solved
weeks = df.Week.to_numpy()
# Total population, N.
N = 100000
# Initial number of infected and recovered individuals, I0 and R0.
I0, R0 = 10, 0
# Everyone else, S0, is susceptible to infection initially.
S0 = N - I0 - R0
J0 = I0
# Contact rate, beta, and mean recovery rate, gamma, (in 1/days).
#reproductive no. R zero is beta/gamma
gamma = 1/7 * 7 #rate should be in weeks now
# A grid of time points
t = np.linspace(0, 77, 77 + 1)
# The SIR model differential equations.
def deriv(y, t, N, beta, gamma):
S, I, R, J = y
dS = ((-beta * S * I) / N)
dI = ((beta * S * I) / N) - (gamma * I)
dR = (gamma * I)
dJ = ((beta * S * I) / N)
return dS, dI, dR, dJ
# Initial conditions are S0, I0, R0
# Integrate the SIR equations over the time grid, t.
solve = odeint(deriv, (S0, I0, R0, J0), t, args=(N, beta, gamma))
S, I, R, J = solve.T
return I/N
def residual(x, df):
# Total population, N.
N = 100000
incidence = df.incidence.to_numpy()/N
return np.sum((peak_infections(x, df)[1:] - incidence) ** 2)
x0 = 0.5
res = minimize(residual, x0, args=(df), method="Nelder-Mead").x
print(res)
The approach I tried here was recreating the dataframe by slicing time which is 77 days, so 11 weeks. It still returns that the shape error, 77 against 11 elements occurs within my function residual in the line return np.sum((peak_infections(x, df)[1:] - incidence) ** 2). Where is my approach going wrong?
-----------EDIT----------
updated code
import numpy as np
from scipy.integrate import odeint
import matplotlib.pyplot as plt
from scipy.optimize import minimize
import pandas as pd
t = np.arange(7,84,7)
d = {'Week': t, 'incidence': [206.1705794,2813.420201,11827.9453,30497.58655,10757.66954,7071.878779,3046.752723,1314.222882,765.9763902,201.3800578,109.8982006]}
#d = {'Week': [time[7],time[14],time[21],time[28],time[35],time[42],time[49],time[56],time[63],time[70],time[77]], 'incidence': [206.1705794,2813.420201,11827.9453,30497.58655,10757.66954,7071.878779,3046.752723,1314.222882,765.9763902,201.3800578,109.8982006]}
#d = {'Week': [1, 2,3,4,5,6,7,8,9,10,11], 'incidence': [206.1705794,2813.420201,11827.9453,30497.58655,10757.66954,7071.878779,3046.752723,1314.222882,765.9763902,201.3800578,109.8982006]}
df = pd.DataFrame(data=d)
def peak_infections(beta, df):
# Weeks for which the ODE system will be solved
weeks = df.Week.to_numpy()
# Total population, N.
N = 100000
# Initial number of infected and recovered individuals, I0 and R0.
I0, R0 = 10, 0
# Everyone else, S0, is susceptible to infection initially.
S0 = N - I0 - R0
J0 = I0
# Contact rate, beta, and mean recovery rate, gamma, (in 1/days).
#reproductive no. R zero is beta/gamma
gamma = 1/7 * 7 #rate should be in weeks now
# A grid of time points
t = np.linspace(0, 77, 77 + 1)
# The SIR model differential equations.
def deriv(y, t, N, beta, gamma):
S, I, R, J = y
dS = ((-beta * S * I) / N)
dI = ((beta * S * I) / N) - (gamma * I)
dR = (gamma * I)
dJ = ((beta * S * I) / N)
return dS, dI, dR, dJ
# Initial conditions are S0, I0, R0
# Integrate the SIR equations over the time grid, t.
solve = odeint(deriv, (S0, I0, R0, J0), t, args=(N, beta, gamma))
S, I, R, J = solve.T
return I/N
def residual(x, df):
# Total population, N.
N = 100000
incidence = df.incidence.to_numpy()/N
return np.sum((peak_infections(x, df) - incidence) ** 2)
x0 = 0.5
res = minimize(residual, x0, args=(df), method="Nelder-Mead").x
print(res)

Your problem occurs at line 52, where you are getting 77 values by solving peak_infections(x, df)[1:] and you have 11 values of incidence, as you have mentioned.
This arises because you are solving your ode at t (line 29) which has 78 values. To avoid this, generate a time vector with 7 values in your peak_infections function as follows:
t = np.linspace(0, 77, 77 + 1)
t = [t[7],t[14],t[21],t[28],t[35],t[42],t[49],t[56],t[63],t[70],t[77]]
or a completely new one as:
t = np.arange(7,84,7)
and change your residual function (don't slice peak_infections(x, df)[1:]) as follows:
def residual(x, df):
# Total population, N.
N = 100000
incidence = df.incidence.to_numpy()/N
return np.sum((peak_infections(x, df) - incidence) ** 2)
this will solve your problem as now you are comparing NumPy arrays with shapes (7,) and (7,) which will not produce an error.

Related

Scipy optimize to find a parameter value

I have an ODE system that outputs a series of trajectories. I wish to use scipy optimize or minimize to find the value of a parameter beta that would best fit, so that the final value in the array I, is 7. I am stuck on the logic and syntax of how to implement the minimizing. My code is here:
# Total population, N.
N = 1
# Initial number of infected and recovered individuals, I0 and R0.
I0, R0 = 0.001, 0
# Everyone else, S0, is susceptible to infection initially.
U0 = N - I0 - R0
J0 = I0
Lf0, Ls0 = 0, 0
# Contact rate, beta, and mean recovery rate, gamma, (in 1/days).
beta, gamma = 5, 365/75
int_gamma = 0.8
mu, muTB, sigma, rho = 1/80, 1/6, 1/6, 0.03
u, v, w = 0.88, 0.083, 0.0006
t = np.linspace(0, 500, 500+1)
# The SIR model differential equations.
def deriv(y, t, N, beta, gamma, mu, muTB, sigma, rho, u, v, w):
U, Lf, Ls, I, R, cInc = y
b = (mu * (U + Lf + Ls + R)) + (muTB * I)
lamda = beta * I
clamda = 0.2 * lamda
dU = b - ((lamda + mu) * U)
dLf = (lamda*U) + ((clamda)*(Ls + R)) - ((u + v + mu) * Lf)
dLs = (u * Lf) - ((w + clamda + mu) * Ls)
dI = w*Ls + v*Lf - ((gamma + muTB + sigma) * I) + (rho * R)
dR = ((gamma + sigma) * I) - ((rho + clamda + mu) * R)
cI = w*Ls + v*Lf + (rho * R)
return dU, dLf, dLs, dI, dR, cI
# Integrate the SIR equations over the time grid, t.
solve = odeint(deriv, (U0, Lf0, Ls0, I0, R0, J0), t, args=(N, beta, gamma, mu, muTB, sigma, rho, u, v, w))
U, Lf, Ls, I, R, cInc = solve.T
Here beta is already defined, but I want a program that uses minimize to find beta, so that in the array I, the final value is 7 (can be approximate or close to 7 also)

plotting beta against maximum I in an SIR model

im trying to run an SIR model in python. I want to plot changing beta values against the maximum of I for each beta value. I have the beta's figured out but i dont know how i would plot the I against it?.
here's my code so far:
import numpy as np
import matplotlib.pyplot as plt
from scipy.integrate import odeint
import math
def SIR(y, t):
S, I, R = y
mu = 0.1
N = S + I + R
Sdot = -beta * S * I
Idot = beta * S * I - mu * I
Rdot = mu * I
return Sdot, Idot, Rdot
tf = 100
Nsteps = 1000
t = np.linspace(0, tf, Nsteps+1)
S0 = 10**4 - 3
I0 = 3
R0 = 0
y0 = np.array([S0, I0, R0])
y_sol = odeint(SIR, y0, t)
S = y_sol[:,0]
I = y_sol[:,1]
R = y_sol[:,2]
Imax = max(I)
tmax = t[I.argmax()]
Smax = S[I.argmax()]
beta_vals = np.linspace(0.2, 0.6, 5)
max_I =[]
for beta in beta_vals:
max_I.append(max(I))
plt.plot(beta_vals, max_I)
plt.xlabel('beta')
plt.ylabel('Maximum value of I')
plt.title('Effect of beta on the maximum value of I')
plt.show()
I thought that using append would give me the corresponding values but i end up with a straight line
You are almost there.
Try this updated code :
import numpy as np
import matplotlib.pyplot as plt
from scipy.integrate import odeint
import math
def SIR(y, t, beta):
S, I, R = y
mu = 0.1
N = S + I + R
Sdot = -beta * S * I
Idot = beta * S * I - mu * I
Rdot = mu * I
return Sdot, Idot, Rdot
tf = 100
Nsteps = 1000
t = np.linspace(0, tf, Nsteps+1)
S0 = 10**4 - 3
I0 = 3
R0 = 0
y0 = np.array([S0, I0, R0])
beta_vals = np.linspace(0.2, 0.6, 5)
max_I = []
for beta in beta_vals:
y_sol = odeint(SIR, y0, t, args=(beta,))
S = y_sol[:,0]
I = y_sol[:,1]
max_I.append(max(I))
plt.plot(beta_vals, max_I)
plt.xlabel('beta')
plt.ylabel('Maximum value of I')
plt.title('Effect of beta on the maximum value of I')
plt.show()

Estimating a parameter in an ODE at a certain time point, given other conditions

Assume if I have all but one parameters in my ODE system. And I wish to infer this. Would I have to simply rearrange the equation to isolate the value? How is that done in a system where you have several equations? For example:
import numpy as np
from scipy.integrate import odeint
import matplotlib.pyplot as plt
#three compartments, Susceptible S, infected I, recovered R
#dS/dt, dI/dt, dR/dt
#susceptible sees birth rate coming in, deaths leaving and force of infection leaving
#infected sees FOI coming in, deaths leaving and recovery rates
#recovered sees recovery rate coming in, deaths leaving
#beta is tranmission coefficient, FOI is beta * (I/N) where N is total pop
#initially consider a model not accounting for births and deaths
# Total population, N.
N = 1000
# Initial number of infected and recovered individuals, I0 and R0.
I0, R0 = 10, 0
# Everyone else, S0, is susceptible to infection initially.
S0 = N - I0 - R0
J0 = I0
# Contact rate, beta, and mean recovery rate, gamma, (in 1/days).
#beta =
gamma = 1/7
# A grid of time points (in days)
t = np.linspace(0, 100, 100+1)
# The SIR model differential equations.
def deriv(y, t, N, beta, gamma):
S, I, R, J = y
dS = ((-beta * S * I) / N)
dI = ((beta * S * I) / N) - (gamma * I)
dR = (gamma * I)
dJ = ((beta * S * I) / N)
return dS, dI, dR, dJ
# Initial conditions are S0, I0, R0
# Integrate the SIR equations over the time grid, t.
solve = odeint(deriv, (S0, I0, R0, J0), t, args=(N, beta, gamma))
S, I, R, J = solve.T
As you can see, beta I have left empty, commented out. If I have all the other values, and know that at the peak of the epidemic, 10% of the population is infected, can beta be found from all the information? What I tried was this:
sol= solve_ivp(lambda beta: deriv,
[t], t_eval= t)
print(sol)
However this syntax does not work, I have realised. What is wrong about my approach? How can I estimate a value for beta?
The easiest approach here is to parameterize your code above by beta and plot the result, which is peak infections for you, as a function of beta, and then see where it crosses the treshold. Define the function:
def peak_infections_pct(beta, n_days_total = 100):
# Total population, N.
N = 1000
# Initial number of infected and recovered individuals, I0 and R0.
I0, R0 = 10, 0
# Everyone else, S0, is susceptible to infection initially.
S0 = N - I0 - R0
J0 = I0
# Contact rate, beta, and mean recovery rate, gamma, (in 1/days).
gamma = 1/7
# A grid of time points (in days)
t = np.linspace(0, n_days_total, n_days_total+1)
# The SIR model differential equations.
def deriv(y, t, N, beta, gamma):
S, I, R, J = y
dS = ((-beta * S * I) / N)
dI = ((beta * S * I) / N) - (gamma * I)
dR = (gamma * I)
dJ = ((beta * S * I) / N)
return dS, dI, dR, dJ
# Initial conditions are S0, I0, R0
# Integrate the SIR equations over the time grid, t.
solve = odeint(deriv, (S0, I0, R0, J0), t, args=(N, beta, gamma))
S, I, R, J = solve.T
return np.max(I)/N
calculate and plot:
betas = np.linspace(0,1,101,endpoint = True)
peak_inf = [peak_infections_pct(b) for b in betas]
plt.plot(betas, peak_inf)
plt.plot(betas, 0.1*np.ones(len(betas)))
to get
so the answer is about beta ~ 0.25
To be more precise just solve for beta:
from scipy.optimize import root
root(lambda b: peak_infections_pct(b)-0.1, x0 = 0.5).x
output:
array([0.23847079])
Note I left the time interval as an input to the function -- you may want to use different length as the epidemic may last longer that 100 days
Just to double check let's plot infections as a function of time for our beta=0.2384..:
indeed the peak is at 100 (with is 10%)

How to implement a system of stochastic ODEs (SDEs) in python?

I have a system of ODEs in which I am trying to include an 'error' term, so that it becomes a system of stochastic ODEs.
For solving a system of ODEs in python I normally use scipy's odeint.
An example derived from the Scipy Cookbook, involving the famous Zombie apocalypse:
# zombie apocalypse modeling
import numpy as np
import matplotlib.pyplot as plt
from scipy.integrate import odeint
plt.rcParams['figure.figsize'] = 10, 8
P = 0 # birth rate
d = 0.0001 # natural death percent (per day)
B = 0.0095 # transmission percent (per day)
G = 0.0001 # resurect percent (per day)
A = 0.0001 # destroy percent (per day)
# solve the system dy/dt = f(y, t)
def f(y, t):
Si = y[0]
Zi = y[1]
Ri = y[2]
# the model equations (see Munz et al. 2009)
f0 = P - B*Si*Zi - d*Si
f1 = B*Si*Zi + G*Ri - A*Si*Zi
f2 = d*Si + A*Si*Zi - G*Ri
return [f0, f1, f2]
# initial conditions
S0 = 500. # initial population
Z0 = 0 # initial zombie population
R0 = 0 # initial death population
y0 = [S0, Z0, R0] # initial condition vector
t = np.linspace(0, 5., 1000) # time grid
# solve the DEs
soln = odeint(f, y0, t)
S = soln[:, 0]
Z = soln[:, 1]
R = soln[:, 2]
# plot results
plt.figure()
plt.plot(t, S, label='Living')
plt.plot(t, Z, label='Zombies')
plt.xlabel('Days from outbreak')
plt.ylabel('Population')
plt.title('Zombie Apocalypse - No Init. Dead Pop.; No New Births.')
plt.legend(loc=0)
plt.show()
Is it possible to use odeint to solve a system of stochastic ODEs?
For example if I would like to include an error term/random walk in the birth rate (P) of the equations?
My idea was to use an extra equation in the system to be able to define a random walk (randomly sampled death rate (using random.normalvariate()) and to solve the system like this:
f0 = P - B*Si*Zi - f3*Si
f1 = B*Si*Zi + G*Ri - A*Si*Zi
f2 = f3*Si + A*Si*Zi - G*Ri
f3 = random.normalvariate(mu, sigma)
return [f0, f1, f2]
Is this the right way to solve a system of SDEs? Or do I have to use a different solver for stochastic ODEs?
With help the system of ODEs was rewriten into an system of SDEs in which the birth rate was a stochastic process.
It was a great suggestion to use SDEint package.
# Zombie apocalypse SDE model
import matplotlib.pyplot as plt
import numpy as np
import sdeint
P, d, B, G, A = 0.0001, 0.0001, 0.0095, 0.0001, 0.0001
tspan = np.linspace(0, 5., 1000)
y0 = np.array([500., 0., 0., P])
def f(y, t):
Si = y[0]
Zi = y[1]
Ri = y[2]
f0 = y[3] - B * Si * Zi - d * Si
f1 = B * Si * Zi + G * Ri - A * Si * Zi
f2 = d * Si + A * Si * Zi - G * Ri
f3 = 0
return np.array([f0, f1, f2, f3])
def GG(y, t):
return np.diag([0, 0, 0, 100])
result = sdeint.itoint(f, GG, y0, tspan)
plt.plot(result)
plt.show()

Solving set of ODEs with Scipy

I am trying to develop an algorithm (use scipy.integrate.odeint()) that predicts the changing concentration of cells, substrate and product (i.e., 𝑋, 𝑆, 𝑃) over time until the system reaches steady- state (~100 or 200 hours). The initial concentration of cells in the bioreactor is 0.1 𝑔/𝐿 and there is no glucose or product in the reactor initially. I want to test the algorithm for a range of different flow rates, 𝑄, between 0.01 𝐿/ℎ and 0.25 𝐿/ℎ and analyze the impact of the flow rate on product production (i.e., 𝑄 ⋅ 𝑃 in 𝑔/ℎ). Eventually, I would like to generate a plot that shows product production rate (y-axis) versus flow rate, 𝑄, on the x-axis. My goal is to estimate the flow rate that results in the maximum (or critical) production rate. This is my code so far:
from scipy.integrate import odeint
import numpy as np
# Constants
u_max = 0.65
K_s = 0.14
K_1 = 0.48
V = 2
X_in = 0
S_in = 4
Y_s = 0.38
Y_p = 0.2
# Variables
# Q - Flow Rate (L/h), value between 0.01 and 0.25 that produces best Q * P
# X - Cell Concentration (g/L)
# S - The glucose concentration (g/L)
# P - Product Concentration (g/L)
# Equations
def func_dX_dt(X, t, S):
u = (u_max) / (1 + (K_s / S))
dX_dt = (((Q * S_in) - (Q * S)) / V) + (u * X)
return dX_dt
def func_dS_dt(S, t, X):
u = (u_max) / (1 + (K_s / S))
dS_dt = (((Q * S_in) - (Q * S)) / V) - (u * (X / Y_s))
return dS_dt
def func_dP_dt(P, t, X, S):
u = (u_max) / (1 + (K_s / S))
dP_dt = ((-Q * P) / V) - (u * (X / Y_p))
return dP_dt
t = np.linspace(0, 200, 200)
# Q placeholder
Q = 0.01
# Attempt to solve the Ordinary differential equations
sol_dX_dt = odeint(func_dX_dt, 0.1, t, args=(S,))
sol_dS_dt = odeint(func_dS_dt, 0.1, t, args=(X,))
sol_dP_dt = odeint(func_dP_dt, 0.1, t, args=(X,S))
In the programs current state there does not seem to be be a way to generate the steady state value for P. I attempted to make this modification to get the value of X.
sol_dX_dt = odeint(func_dX_dt, 0.1, t, args=(odeint(func_dS_dt, 0.1, t, args=(X,)),))
It produces the error:
NameError: name 'X' is not defined
At this point I am not sure how to move forward.
(Edit 1: Added Original Equations)
First Equation
Second Equation and Third Equation
You do not have to apply the functions to each part but return a tuple of the derivatives as I show below:
import numpy as np
from scipy.integrate import odeint
import matplotlib.pyplot as plt
Q = 0.01
V = 2
Ys = 0.38
Sin = 4
Yp = 0.2
Xin = 0
umax = 0.65
Ks = 0.14
K1 = 0.48
def mu(S, umax, Ks, K1):
return umax/((1+Ks/S)*(1+S/K1))
def dxdt(x, t, *args):
X, S, P = x
Q, V, Xin, Ys, Sin, Yp, umax, Ks, K1 = args
m = mu(S, umax, Ks, K1)
dXdt = (Q*Xin - Q*X)/V + m*X
dSdt = (Q*Sin - Q*S)/V - m*X/Ys
dPdt = -Q*P/V - m*X/Yp
return dXdt, dSdt, dPdt
t = np.linspace(0, 200, 200)
X0 = 0.1
S0 = 0.1
P0 = 0.1
x0 = X0, S0, P0
sol = odeint(dxdt, x0, t, args=(Q, V, Xin, Ys, Sin, Yp, umax, Ks, K1))
plt.plot(t, sol[:, 0], 'r', label='X(t)')
plt.plot(t, sol[:, 1], 'g', label='S(t)')
plt.plot(t, sol[:, 2], 'b', label='P(t)')
plt.legend(loc='best')
plt.xlabel('t')
plt.grid()
plt.show()
Output:

Categories