So i've made a simple program for numerically aproximating double integral, which accepts that the bounds of the inner integral are funcions:
def double_integral(func, limits, res=1000):
t = time.clock()
t1 = time.clock()
t2 = time.clock()
s = 0
a, b = limits[0], limits[1]
outer_values = np.linspace(a, b, res)
c_is_func = callable(limits[2])
d_is_func = callable(limits[3])
for y in outer_values:
if c_is_func:
c = limits[2](y)
else:
c = limits[2]
if d_is_func:
d = limits[3](y)
else:
d = limits[3]
dA = ((b - a) / res) * ((d - c) / res)
inner_values = np.linspace(c, d, res)
for x in inner_values:
t2 = time.clock() - t2
s += func(x, y) * dA
t1 = time.clock() - t1
t = time.clock() - t
return s, t, t1 / res, t2 / res**2
This is, however, terribly slow. When res=1000, such that the integral is a sum of a million parts, it takes about 5 seconds to run, but the answer is only correct to about the 3rd decimal in my experience. Is there any way to speed this up?
The code i am running to check the integral is
def f(x, y):
if (4 - y**2 - x**2) < 0:
return 0 #This is to avoid taking the root of negarive #'s
return np.sqrt(4 - y**2 - x**2)
def c(y):
return np.sqrt(2 * y - y**2)
def d(y):
return np.sqrt(4 - y**2)
# b d
# S S f(x,y) dx dy
# a c
a, b, = 0, 2
print(double_integral(f, [a, b, c, d]))
The integral is eaqual to 16/9
Edit
so i got a great answer over at coderewiev, but i am still baffeled by how scipy.integrate.dblquad seem to give me the wrong answer (see comment). does anyone have an answer for this?
Related
I have a few lines of code which doesn't converge. If anyone has an idea why, I would greatly appreciate. The original equation is written in def f(x,y,b,m) and I need to find parameters b,m.
np.random.seed(42)
x = np.random.normal(0, 5, 100)
y = 50 + 2 * x + np.random.normal(0, 2, len(x))
def f(x, y, b, m):
return (1/len(x))*np.sum((y - (b + m*x))**2) # it is supposed to be a sum operator
def dfb(x, y, b, m): # partial derivative with respect to b
return b - m*np.mean(x)+np.mean(y)
def dfm(x, y, b, m): # partial derivative with respect to m
return np.sum(x*y - b*x - m*x**2)
b0 = np.mean(y)
m0 = 0
alpha = 0.0001
beta = 0.0001
epsilon = 0.01
while True:
b = b0 - alpha * dfb(x, y, b0, m0)
m = m0 - alpha * dfm(x, y, b0, m0)
if np.sum(np.abs(m-m0)) <= epsilon and np.sum(np.abs(b-b0)) <= epsilon:
break
else:
m0 = m
b0 = b
print(m, f(x, y, b, m))
Both derivatives got some signs mixed up:
def dfb(x, y, b, m): # partial derivative with respect to b
# return b - m*np.mean(x)+np.mean(y)
# ^-------------^------ these are incorrect
return b + m*np.mean(x) - np.mean(y)
def dfm(x, y, b, m): # partial derivative with respect to m
# v------ this should be negative
return -np.sum(x*y - b*x - m*x**2)
In fact, these derivatives are still missing some constants:
dfb should be multiplied by 2
dfm should be multiplied by 2/len(x)
I imagine that's not too bad because the gradient is scaled by alpha anyway, but it could make the speed of convergence worse.
If you do use the correct derivatives, your code will converge after one iteration:
def dfb(x, y, b, m): # partial derivative with respect to b
return 2 * (b + m * np.mean(x) - np.mean(y))
def dfm(x, y, b, m): # partial derivative with respect to m
# Used `mean` here since (2/len(x)) * np.sum(...)
# is the same as 2 * np.mean(...)
return -2 * np.mean(x * y - b * x - m * x**2)
I am trying to code a simple equilibrium reaction where
[A]+[B] <-> [C] + [D]
[A]=50
[B]=50
[C]=50
[D]=50
k1 (forward)= 1
k-1 (reverse)= .5
Thank you
Your question is not very clear,
but i guess you would like to have a program that calculates the equilibrium concentrations of the reaction.
I also suppose that the input data you gave, [A], ..., [D], correspond to the initial concentrations.
Assuming that, the first thing I would advise you - before writing any line of code,
is to establish the formula that gives the equilibrium concentrations; and then implement it.
So, let's do first a bit of chemistry and maths :)
(I've based my reasoning on this course: http://cinet.chim.pagesperso-orange.fr/cours/chap3.html#L18704)
First, the extent at equilibrium is:
xe = [A]0-[A]∞ = [B]0-[B]∞ = [C]∞-[C]0 = [D]∞-[D]0 (1)
Also at equilibrium, you have the Law of Mass Action:
K(constant) = k1/k-1 = [C]∞⋅[D]∞/[A]∞⋅[B]∞ (2)
You can express the constant K as a function of [A]∞,
replacing [B]∞, [C]∞ and [D]∞ in (2) using equations in (1):
[B]∞ = [B]0 + [A]∞ - [A]0[C]∞ = [C]0 + [A]0 - [A]∞[D]∞ = [D]0 + [A]0 - [A]∞K = ([C]0+[A]0-[A]∞)⋅([D]0+[A]0-[A]∞)/[A]∞⋅([B]0+[A]∞-[A]0) (3)
Where (3) can be seen as a second degree equation:
x := [A]∞
(K-1)⋅x2 + [2⋅[A]0 + [C]0 + [D]0 + K⋅([B]0-[A]0)]⋅x - [[A]0⋅([A]0+[C]0+[D]0) + [C]0⋅[D]0] = 0
Similarly or by symmetry, you could have done the same for [B]∞, [C]∞ or [D]∞.
But it is easier to reuse (1).
Now, we can see better how to proceed. Let's code it!
import math
# Solve the quadratic equation a⋅x² + b⋅x + c = 0
def solve_2nd_degree_equation(a, b, c):
discriminant = (b ** 2) - (4 * a * c)
solution_1 = (-b - math.sqrt(discriminant)) / (2 * a)
solution_2 = (-b + math.sqrt(discriminant)) / (2 * a)
return [solution_1, solution_2]
The function above will be used for solving second degree equations.
class EquilibriumReactionProblem(object):
def __init__(self, a0, b0, c0, d0, k_forward, k_reverse):
self.a0 = a0
self.b0 = b0
self.c0 = c0
self.d0 = d0
self.k = k_forward / k_reverse
def _compute_a_equilibrium(self):
a = self.k - 1
b = 2 * self.a0 + self.c0 + self.d0 + self.k * (self.b0 - self.a0)
c = -(self.a0 * (self.a0 + self.c0 + self.d0) + self.c0 * self.d0)
[res] = [s for s in solve_2nd_degree_equation(a, b, c) if 0 <= s]
return round(res, 2)
def solve(self):
a_equilibrium = self._compute_a_equilibrium()
b_equilibrium = self.b0 + a_equilibrium - self.a0
c_equilibrium = self.c0 + self.a0 - a_equilibrium
d_equilibrium = self.d0 + self.a0 - a_equilibrium
print({
'a_equilibrium': a_equilibrium,
'b_equilibrium': b_equilibrium,
'c_equilibrium': c_equilibrium,
'd_equilibrium': d_equilibrium
})
All we have to do is now to run it with the initial conditions:
EquilibriumReactionProblem(
a0=50,
b0=50,
c0=50,
d0=50,
k_forward=1,
k_reverse=0.5
).solve()
{'a_equilibrium': 41.42, 'b_equilibrium': 41.42, 'c_equilibrium': 58.58, 'd_equilibrium': 58.58}
EDIT: You may also take advantage of ChemPy lib.
I'm trying to optimize simple integration in python which looks something like
from scipy import integrate
import numpy as np
from scipy.special import kv
import time
#Example function
def integrand(x, a, b, c):
return a * (x ** (-b)) * (np.sqrt(x ** (c) + 1) - 1)
#Real Function that I want to calculate
def Bes(xx):
return integrate.quad(lambda x: kv(5./3.,x), xx,np.inf)
def F(x,a,b,c,d,e,f):
zx = 1/((x**2.+1)*a)
feq = e*x**(f)
if (x>c):
feq *= c/x * np.exp(-(x/d)**2.)
return b*Bes(zx)*feq*x**2.
start = time.time()
array_length = 10
a = np.random.rand(array_length)+3.
b = np.random.rand(array_length)+1.
c = np.random.rand(array_length)
d = (np.random.rand(array_length)+1)*100.
e = np.random.rand(array_length)*100.
f = np.random.rand(array_length)
inte = np.array([])
for i in range(array_length):
result = integrate.quad(lambda x: F(x, a[i], b[i], c[i],d[i],e[i],f[i]),0.01,100000.)
inte = np.append(inte,result[0])
print("For array length = %i" % array_length)
print("Time = %.2f [sec]" %(time.time()-start))
But the problems that I'm facing are
a, b, c are array with length > 10^7 (same length)
integration range of x starts at 0.01 and extends to infinite
Integration at the small x (like [0.01, 1]) is very important and needs small step.
I want to integrate this function on each coefficient value and returns the entire array of integration as the result (length ~ 10^7), efficiently.
What kind of tools should I use?
(+) I just changed my code from simple example to actual integration form that I need to solve. Sorry for making confusion.
I suspected that this integral would converge for certain values of b and c, so I tried to evaluate this using Sympy:
import sympy
sympy.init_printing()
a, b, c = sympy.symbols('a, b, c', positive=True)
x = sympy.Symbol('x', positive=True)
sympy.integrate(a*(x**(-b))*(sympy.sqrt(x**c+1)-1), (x, 0, sympy.oo))
This means that you should be able to obtain the correct results with this code as long as your coefficients pass the check function.
from numpy import sqrt, pi
from scipy.special import gamma
def check(a, b, c):
assert (-(-b + 1)/c < 1)
assert (1/2 - (-b + 1)/c > 1)
assert (1 - (-b + 1)/c > 1)
def result(a, b, c):
return a*gamma(-b/c + 1 + 1/c)*gamma(b/c - 1/2 - 1/c)/(2*sqrt(pi)*(b - 1))
I have a CSV that provides a y value for three different x values for each row. When read into a pandas DataFrame, it looks like this:
5 10 20
0 -13.6 -10.7 -10.3
1 -14.1 -11.2 -10.8
2 -12.3 -9.4 -9.0
That is, for row 0, at 5 the value is -13.6, at 10 the value is -10.7, and at 20 the value is -10.3. These values are the result of an algorithm in the form:
def calc(x, r, b, c, d):
if x < 10:
y = (x * r + b) / x
elif x >= 10 and x < 20:
y = ((x * r) + (b - c)) / x
else:
y = ((x * r) + (b - d)) / x
return y
I want to find the value of r, b, c, and d for each row. I know certain things about each of the values. For example, for each row: r is in np.arange(-.05, -.11, -.01), b is in np.arange(0, -20.05, -.05), and c and d are in np.arange(0, 85, 5). I also know that d is <= c.
Currently, I am solving this with brute force. For each row, I iterate through every combination of r, b, c, and d and test if the value at the three x values is equal to the known value from the DataFrame. This works, giving me a few combinations for each row that are basically the same except for rounding differences.
The problem is that this approach takes a long time when I need to run it against 2,000+ rows. My question is: is there a faster way than iterating and testing every combination? My understanding is that this is a constraint satisfaction problem but, after that, I have no idea what to narrow in on; there are so many types of constraint satisfaction problems (it seems) that I'm still lost (I'm not even certain that this is such a problem!). Any help in pointing me in the right direction would be greatly appreciated.
I hope i understood the task correctly.
If you know the resolution/discretization of the parameters, it looks like a discrete-optimization problem (in general: hard), which could be solved by CP-approaches.
But if you allow these values to be continuous (and reformulate the formulas), it is:
(1) a Linear Program: if checking for feasible values (there needs to be a valid solution)
(2) a Linear Program: if optimizing parameters for minimization of sum of absolute differences (=errors)
(3) a Quadratic Program: if optimizing parameters for minimization of sum of squared differences (=errors) / equivalent to minimizing euclidean-norm
All three versions can be solved efficiently!
Here is a non-general (could be easily generalized) implementation of (3) using cvxpy to formulate the problem and ecos to solve the QP. Both tools are open-source.
Code
import numpy as np
import time
from cvxpy import *
from random import uniform
""" GENERATE TEST DATA """
def sample_params():
while True:
r = uniform(-0.11, -0.05)
b = uniform(-20.05, 0)
c = uniform(0, 85)
d = uniform(0, 85)
if d <= c:
return r, b, c, d
def calc(x, r, b, c, d):
if x < 10:
y = (x * r + b) / x
elif x >= 10 and x < 20:
y = ((x * r) + (b - c)) / x
else:
y = ((x * r) + (b - d)) / x
return y
N = 2000
sampled_params = [sample_params() for i in range(N)]
data_5 = np.array([calc(5, *sampled_params[i]) for i in range(N)])
data_10 = np.array([calc(10, *sampled_params[i]) for i in range(N)])
data_20 = np.array([calc(20, *sampled_params[i]) for i in range(N)])
data = np.empty((N, 3))
for i in range(N):
data[i, :] = [data_5[i], data_10[i], data_20[i]]
""" SOLVER """
def solve(row):
""" vars """
R = Variable(1)
B = Variable(1)
C = Variable(1)
D = Variable(1)
E = Variable(3)
""" constraints """
constraints = []
# bounds
constraints.append(R >= -.11)
constraints.append(R <= -.05)
constraints.append(B >= -20.05)
constraints.append(B <= 0.0)
constraints.append(C >= 0.0)
constraints.append(C <= 85.0)
constraints.append(D >= 0.0)
constraints.append(D <= 85.0)
constraints.append(D <= C)
# formula of model
constraints.append((1.0 / 5.0) * B + R == row[0] + E[0]) # alternate function form: b/x+r
constraints.append((1.0 / 10.0) * B - (1.0 / 10.0) * C == row[1] + E[1]) # alternate function form: b/x-c/x+r
constraints.append((1.0 / 20.0) * B - (1.0 / 20.0) * D == row[2] + E[2]) # alternate function form: b/x-d/x+r
""" Objective """
objective = Minimize(norm(E, 2))
""" Solve """
problem = Problem(objective, constraints)
problem.solve(solver=ECOS, verbose=False)
return R.value, B.value, C.value, D.value, E.value
start = time.time()
for i in range(N):
r, b, c, d, e = solve(data[i])
end = time.time()
print('seconds taken: ', end-start)
print('seconds per row: ', (end-start) / N)
Output
('seconds taken: ', 20.620506048202515)
('seconds per row: ', 0.010310253024101258)
I'm trying to fit a logistic growth curve to my data using curve_fit using the following function as the input.
def logistic(x, y0, k, d, a, b):
if b > 0 and a > 0:
y = (k * pow(1 + np.exp(d - (a * b * x) ), (-1/b) )) + y0
elif b >= -1 or b < 0 or a < 0:
y = (k * pow(1 - np.exp(d - (a * b * x) ), (-1/b) )) + y0
return y
As you can see the function i am using has some restrictions on the values it can accept for parameter a and b. Any guess on how to handle the incorrect values? Should the input function raise an exception or return a dummy value?
Thanks in advance.
When the parameters fall out of the admissible range, return a wildly huge number (far from the data to be fitted). This will (hopefully) penalize this choice of parameters so much that curve_fit will settle on some other admissible set of parameters as optimal:
def logistic(x, y0, k, d, a, b):
if b > 0 and a > 0:
y = (k * pow(1 + np.exp(d - (a * b * x) ), (-1/b) )) + y0
elif b >= -1 or b < 0 or a < 0:
y = (k * pow(1 - np.exp(d - (a * b * x) ), (-1/b) )) + y0
else:
y = 1e10
return y