How to Evaluate Function at Integration Times Using Scipy's solve_ivp - python

I'm using scipy.integrate's solve_ivp method to solve an ivp, and I want to be able to evaluate a function at the time steps that I give for the integration, but I don't know how to do it.
I could go back through each of the elements in the integration, but that would take a ridiculous amount of time in addition to the time that it already takes to solve the ivp, so I would much rather be able to calculate them at the same time that the actual method calculates the values at during the integration.
import scipy.integrate
import numpy
class Foo:
def __init__(self):
self.foo_vector_1 = numpy.zeros(3)
self.foo_vector_2 = numpy.zeros(3)
self.foo_vector_3 = numpy.zeros(3)
foo = Foo()
d_vector_1 = lambda foo: # gets the derivative of foo_vector_1
d_vector_2 = lambda foo: # gets the derivative of foo_vector_2
def get_foo_vector_3_value(foo):
return # returns the ACTUAL VALUE of foo_vector_3, NOT its derivative
def dy(t, y):
foo.foo_vector_1 = numpy.array((y[0],y[1],y[2]))
foo.foo_vector_2 = numpy.array((y[3],y[4],y[5]))
return numpy.array((d_vector_1(foo),d_vector_2(foo))).flatten().tolist()
foo.foo_vector_1 = numpy.array((1,2,3))
foo.foo_vector_2 = numpy.array((4,5,6))
y0 = numpy.array((foo.foo_vector_1, foo.foo_vector_2)).flatten().tolist()
sol = scipy.integrate.solve_ivp(dy, (0,10), y0, t_eval=numpy.arange(0,1000,1))
foo_vectors_1 = numpy.column_stack((sol.y[0], sol.y[1], sol.y[2]))
foo_vectors_2 = numpy.column_stack((sol.y[3], sol.y[4], sol.y[5]))
foo_vectors_3 = ????????
Ideally, I would be able to get the value of foo_vectors_3 without having to reset foo in a loop over the whole lists of foo vectors, because for me that would actually take a significant amount of computation time.

I think the friction here is avoiding the use 1D numpy ndarray as the base object for computation. You can mentally apportion the 1D array into your 2 separate foo attributes Then the computation of foo_vectors_3 will be trivial compared to the ODE integration. You could also add helper functions to map from the 1D ndarray for solve_ivp and your foo_vectors and back.
In [65]: import scipy.integrate
...: import numpy as np
...:
...: def d_vec1(t, y):
...: # put in your function here instead of just returning 1
...: return 1 * np.ones_like(y)
...:
...: def d_vec2(t, y):
...: # put in your function here instead of just returning 2
...: return 2 * np.ones_like(y)
...:
...: def eval_foo3(t, y):
...: return y[0:3,:] + y[3:,:] # use your own function instead
...:
...: def dy(t, y):
...: return numpy.array((d_vec1(t, y[0:3]), d_vec2(t, y[3:]))).flatten()
...:
...: v1 = np.array([1, 2, 3])
...: v2 = np.array([4, 5, 6])
...: y0 = numpy.array((v1, v2)).flatten()
...: t_eval = np.linspace(0, 10, 11)
...: sol = scipy.integrate.solve_ivp(dy, (0, 10), y0, t_eval=t_eval)
...:
...: foo3 = eval_foo3(sol.t, sol.y)
...: print(sol.y[0:3])
...: print(sol.y[3:])
...: print(foo3)
[[ 1. 2. 3. 4. 5. 6. 7. 8. 9. 10. 11.]
[ 2. 3. 4. 5. 6. 7. 8. 9. 10. 11. 12.]
[ 3. 4. 5. 6. 7. 8. 9. 10. 11. 12. 13.]]
[[ 4. 6. 8. 10. 12. 14. 16. 18. 20. 22. 24.]
[ 5. 7. 9. 11. 13. 15. 17. 19. 21. 23. 25.]
[ 6. 8. 10. 12. 14. 16. 18. 20. 22. 24. 26.]]
[[ 5. 8. 11. 14. 17. 20. 23. 26. 29. 32. 35.]
[ 7. 10. 13. 16. 19. 22. 25. 28. 31. 34. 37.]
[ 9. 12. 15. 18. 21. 24. 27. 30. 33. 36. 39.]]

Related

Different results on using function and it's content

I was trying to understand the working of the function fast_knn of impyute library. So, I tried to execute it line by line in order to understand the working. Here it is:
import numpy as np
from scipy.spatial import KDTree
def shepards(distances, power=2):
return to_percentage(1/np.power(distances, power))
def to_percentage(vec):
return vec/np.sum(vec)
data_temp = np.arange(25).reshape((5, 5)).astype(np.float)
data_temp[0][2] = np.nan
k=4
eps=0
p=2
distance_upper_bound=np.inf
leafsize=10
idw_fn=shepards
init_impute_fn=mean
nan_xy = np.argwhere(np.isnan(data_temp))
data_temp_c = init_impute_fn(data_temp)
kdtree = KDTree(data_temp_c, leafsize=leafsize)
for x_i, y_i in nan_xy:
distances, indices = kdtree.query(data_temp_c[x_i], k=k+1, eps=eps,
p=p, distance_upper_bound=distance_upper_bound)
# Will always return itself in the first index. Delete it.
distances, indices = distances[1:], indices[1:]
# Add small constant to distances to avoid division by 0
distances += 1e-3
weights = idw_fn(distances)
# Assign missing value the weighted average of `k` nearest neighbours
data_temp[x_i][y_i] = np.dot(weights, [data_temp_c[ind][y_i] for ind in indices])
data_temp
This outputs:
array([[ 0. , 1. , 10.06569379, 3. , 4. ],
[ 5. , 6. , 7. , 8. , 9. ],
[10. , 11. , 12. , 13. , 14. ],
[15. , 16. , 17. , 18. , 19. ],
[20. , 21. , 22. , 23. , 24. ]])
whereas the function has a different output. The code :
from impyute import fast_knn
import numpy as np
data_temp = np.arange(25).reshape((5, 5)).astype(np.float)
data_temp[0][2] = np.nan
fast_knn(data_temp, k=4)
and the output
array([[ 0. , 1. , 16.78451885, 3. , 4. ],
[ 5. , 6. , 7. , 8. , 9. ],
[10. , 11. , 12. , 13. , 14. ],
[15. , 16. , 17. , 18. , 19. ],
[20. , 21. , 22. , 23. , 24. ]])
``
There seems to be discrepancies with the GitHub repository code and library source code ( the repository has not been updated). The following is the library source code :
def fast_knn(data, k=3, eps=0, p=2, distance_upper_bound=np.inf, leafsize=10, **kwargs):
null_xy = find_null(data)
data_c = mean(data)
kdtree = KDTree(data_c, leafsize=leafsize)
for x_i, y_i in null_xy:
distances, indices = kdtree.query(data_c[x_i], k=k+1, eps=eps,
p=p, distance_upper_bound=distance_upper_bound)
# Will always return itself in the first index. Delete it.
distances, indices = distances[1:], indices[1:]
weights = distances/np.sum(distances)
# Assign missing value the weighted average of `k` nearest neighbours
data[x_i][y_i] = np.dot(weights, [data_c[ind][y_i] for ind in indices])
return data
The weights are computed in a different manner (not using the shepards function). Hence, the difference in outputs.
Maybe you used the code on the current master branch of impyute. But the impyute package version you used maybe v0.0.8 — the current recent version — whose code is at the release/0.0.8 branch.
The difference in the definition of fast_knn is below.
On the current master branch:
# Will always return itself in the first index. Delete it.
distances, indices = distances[1:], indices[1:]
# Add small constant to distances to avoid division by 0
distances += 1e-3
weights = idw_fn(distances)
On release/0.0.8 branch:
# Will always return itself in the first index. Delete it.
distances, indices = distances[1:], indices[1:]
weights = distances/np.sum(distances)
If you use the code in the release/0.0.8 branch, you will get the same result as you use the impyute package.

Python: Apply function to every entry in numpy 3d array

I would like to apply a (more complex?) function on my 3d numpy array with the shape x,y,z = (4,4,3).
Let's assume I have the following array:
array = np.arange(48)
array = array.reshape([4,4,3])
Now I would like to call the following function on each point of the array:
p(x,y,z) = a(z) + b(z)*ps(x,y)
Let's assume a and b are the following 1d arrays, respectively ps a 2d array.
a = np.random.randint(1,10, size=3)
b = np.random.randint(1,10, size=3)
ps = np.arrange(16)
ps = ps.reshape([4,4])
My intuitive approach was to loop over my array and call the function on each point. It works, but of course it's way too slow:
def calcP(a,b,ps,x,y,z):
p = a[z]+b[z]*ps[x,y]
return p
def stupidLoop(array, a, b, ps, x, y, z):
dummy = array
for z in range (0, 3):
for x in range (0, 4):
for y in range (0, 4):
dummy[x,y,z]=calcP(a,b,ps,x,y,z)
return dummy
updatedArray=stupidLoop(array,a, b, ps, x, y, z)
Is there a faster way? I know it works with vectorized functions, but I cannot figure it out with mine.
I didn't actually try it with these numbers. It's just to exemplify my problem. It comes from the Meteorology world and is a little more complex.
Vectorize the loop, and use broadcasting:
a.reshape([1,1,-1]) + b.reshape([1,1,-1]) * ps.reshape([4,4,1])
EDIT:
Thanks #NilsWerner for offering a more common way in comment:
a + b * ps[:, :, None]
You can do this using numpy.fromfunction():
import numpy as np
a = np.random.randint(1,10, size=3)
b = np.random.randint(1,10, size=3)
ps = np.arange(16)
ps = ps.reshape([4,4])
def calcP(x,y,z,a=a,b=b,ps=ps):
p = a[z]+b[z]*ps[x,y] + 0.0
return p
array = np.arange(48)
array = array.reshape([4,4,3])
updatedArray = np.fromfunction(calcP, (4,4,3), a=a,b=b,ps=ps, dtype=int)
print (updatedArray)
Notice that I've modified your function calcP slightly, to take kwargs. Also, I've added 0.0, to ensure that the output array will be of floats and not ints.
Also, notice that the second argument to fromfunction() merely specifies the shape of the grid, over which the function calcP() is to be invoked.
Output (will vary each time due to randint):
[[[ 8. 5. 3.]
[ 9. 6. 12.]
[ 10. 7. 21.]
[ 11. 8. 30.]]
[[ 12. 9. 39.]
[ 13. 10. 48.]
[ 14. 11. 57.]
[ 15. 12. 66.]]
[[ 16. 13. 75.]
[ 17. 14. 84.]
[ 18. 15. 93.]
[ 19. 16. 102.]]
[[ 20. 17. 111.]
[ 21. 18. 120.]
[ 22. 19. 129.]
[ 23. 20. 138.]]]

How to upsample a vector to a certain length in python

Given a vector, [1,2,3,4,5] for example, how to upsample the vector with linear interpolation to a certain length, such as 45 in python.
If it is linear, there should be a constant increase or decrease between each new element. In your case it is one. So sample the difference between two elements, then add that to the last element however many times you want to.
a = [1,2,3,4,5]
num_add = 45 -len(a)
b = a[1] - a[0]
for z in range(1,num_add):
a.append(b + a[-1])
I think this should work, although you may have to play with the range.
Well, I interpreted your list of [1, 2, 3, 4, 5] as simply an example. If you want a script that will actually interpolate the series you give it, try this:
from scipy.optimize import curve_fit
import numpy as np
# Line equation - doesn't have to be linear
def lin_eq(x, m, b):
return x*m + b
# Your actual data
std_y = np.array([1, 2, 3, 4, 5])
# Index of data
std_x = np.arange(1, len(std_y) + 1)
popt, pcov = curve_fit(lin_eq, std_x, std_y)
top = 45
# Index of projected data
proj_x = np.arange(1, top + 1)
# Interpolated data
proj_y = lin_eq(proj_x, *popt)
print proj_y
[ 1. 2. 3. 4. 5. 6. 7. 8. 9. 10. 11. 12. 13. 14. 15.
16. 17. 18. 19. 20. 21. 22. 23. 24. 25. 26. 27. 28. 29. 30.
31. 32. 33. 34. 35. 36. 37. 38. 39. 40. 41. 42. 43. 44. 45.]

apply polyfit on rolling base

I found this usefull article on polyfit which works pretty good:
http://www.emilkhatib.com/analyzing-trends-in-data-with-pandas/
import numpy as np
coefficients, residuals, _, _, _ = np.polyfit(range(len(selected.index)),selected,1,full=True)
mse = residuals[0]/(len(selected.index))
nrmse = np.sqrt(mse)/(selected.max() - selected.min())
print('Slope ' + str(coefficients[0]))
print('NRMSE: ' + str(nrmse))
now i would like to use this on a rolling base..
def test(input_list, i):
if sum(~np.isnan(x) for x in input_list) < 2:
return np.NaN
print(input_list)
coefficients, residuals, _, _, _ = np.polyfit(range(len(input_list)),input_list,1,full=True)
mse = residuals[0]/(len(input_list))
nrmse = np.sqrt(mse)/(input_list.max() - input_list.min())
print('Slope ' + str(coefficients[0]))
print('NRMSE: ' + str(nrmse))
a = coefficients[0]*i + coefficients[1]
return a
df['pred'] = df['abs'].rolling(window=2, min_periods=1, center=False).apply(lambda x: test(x, base1.index))
but i wont get it working :)
i get
IndexError: index 0 is out of bounds for axis 0 with size 0 instead of correct results :)
anybody got an idea? thanks! e.
****EDIT1****
sorry, i missed posting a concrete example...
i managed to get the function working, by transforming the numpy array in a df.
but somehow residuals is empty
import quandl
import MySQLdb
import pandas as pd
import numpy as np
import sys
import matplotlib.pyplot as plt
def test(input_list, i):
if sum(~np.isnan(x) for x in input_list) < 2:
return np.NaN
abc = pd.DataFrame(input_list)
coefficients, residuals, _, _, _ = np.polyfit(range(len(abc)),abc[0],1,full=True)
#residuals is empty... why?
a = coefficients[0]*len(abc) + coefficients[1]
return a
df = quandl.get("WIKI/GOOGL")
df = df.ix[:, ['High', 'Low', 'Close']]
#reseit index for calc
#base1['DateTime'] = base1.index
#base1.index = range(len(base1))
df['close_pred'] = df['Close'].rolling(window=15, min_periods=2, center=False).apply(lambda x: test(x, 0))
print(df.head(30).to_string())
Residuals are empty just for 1st iteration see little modified code and answer
def test(data):
if sum(~np.isnan(x) for x in data) < 2:
return np.NaN
df = pd.DataFrame(data)
coefficients, residuals, _, _, _ = np.polyfit(range(len(data)),df[0],1,full=True)
#if residuals.size == 0:
# residuals = [0]
print(coefficients[-2], residuals, data)
return coefficients[-2]
and answer
df_xx['pred'] = df_xx[0].rolling(window=5, min_periods=2, center=False).apply(lambda y: test(y))
0.9999999999999998 [] [0. 1.]
1.0 [4.29279946e-34] [0. 1. 2.]
1.0000000000000002 [3.62112419e-33] [0. 1. 2. 3.]
0.9999999999999999 [8.77574736e-31] [0. 1. 2. 3. 4.]
0.9999999999999999 [1.25461096e-30] [1. 2. 3. 4. 5.]
0.9999999999999999 [2.93468782e-30] [2. 3. 4. 5. 6.]
0.9999999999999997 [1.38665176e-30] [3. 4. 5. 6. 7.]
0.9999999999999997 [2.18347839e-30] [4. 5. 6. 7. 8.]
0.9999999999999999 [6.21693422e-30] [5. 6. 7. 8. 9.]
1.0 [1.07025673e-29] [ 6. 7. 8. 9. 10.]
1.0000000000000002 [1.4374879e-29] [ 7. 8. 9. 10. 11.]
0.9999999999999997 [1.14542951e-29] [ 8. 9. 10. 11. 12.]
1.0000000000000004 [9.73226454e-30] [ 9. 10. 11. 12. 13.]
0.9999999999999997 [1.99069506e-29] [10. 11. 12. 13. 14.]
0.9999999999999997 [1.09437894e-29] [11. 12. 13. 14. 15.]
1.0 [3.60983058e-29] [12. 13. 14. 15. 16.]
1.0000000000000002 [1.90967258e-29] [13. 14. 15. 16. 17.]
1.0000000000000002 [3.13030715e-29] [14. 15. 16. 17. 18.]
1.0 [1.25806434e-29] [15. 16. 17. 18. 19.]
simple code below fix it
if residuals.size == 0:
residuals = [0]

Conditional numpy cumulative sum

I'm looking for a way to calculate the cumulative sum with numpy, but don't want to roll forward the value (or set it to zero) in case the cumulative sum is very close to zero and negative.
For instance
a = np.asarray([0, 4999, -5000, 1000])
np.cumsum(a)
returns [0, 4999, -1, 999]
but, I'd like to set the [2]-value (-1) to zero during the calculation. The problem is that this decision can only be done during calculation as the intermediate result isn't know a priori.
The expected array is: [0, 4999, 0, 1000]
The reason for this is that I'm getting very small values (floating point, not integers as in the example) which are due to floating point calculations which should in reality be zero. Calculating the cumulative sum compounds those values which leads to errors.
The Kahan summation algorithm could solve the problem. Unfortunately, it is not implemented in numpy. This means a custom implementation is required:
def kahan_cumsum(x):
x = np.asarray(x)
cumulator = np.zeros_like(x)
compensation = 0.0
cumulator[0] = x[0]
for i in range(1, len(x)):
y = x[i] - compensation
t = cumulator[i - 1] + y
compensation = (t - cumulator[i - 1]) - y
cumulator[i] = t
return cumulator
I have to admit, this is not exactly what was asked for in the question. (A value of -1 at the 3rd output of the cumsum is correct in the example). However, I hope this solves the actual problem behind the question, which is related to floating point precision.
I wonder if rounding will do what you are asking for:
np.cumsum(np.around(a,-1))
# the -1 means it rounds to the nearest 10
gives
array([ 0, 5000, 0, 1000])
It is not exactly as you put in your expected array from your answer, but using around, perhaps with the decimals parameter set to 0, might work when you apply it to the problem with floats.
Probably the best way to go is to write this bit in Cython (name the file cumsum_eps.pyx):
cimport numpy as cnp
import numpy as np
cdef inline _cumsum_eps_f4(float *A, int ndim, int dims[], float *out, float eps):
cdef float sum
cdef size_t ofs
N = 1
for i in xrange(0, ndim - 1):
N *= dims[i]
ofs = 0
for i in xrange(0, N):
sum = 0
for k in xrange(0, dims[ndim-1]):
sum += A[ofs]
if abs(sum) < eps:
sum = 0
out[ofs] = sum
ofs += 1
def cumsum_eps_f4(cnp.ndarray[cnp.float32_t, mode='c'] A, shape, float eps):
cdef cnp.ndarray[cnp.float32_t] _out
cdef cnp.ndarray[cnp.int_t] _shape
N = np.prod(shape)
out = np.zeros(N, dtype=np.float32)
_out = <cnp.ndarray[cnp.float32_t]> out
_shape = <cnp.ndarray[cnp.int_t]> np.array(shape, dtype=np.int)
_cumsum_eps_f4(&A[0], len(shape), <int*> &_shape[0], &_out[0], eps)
return out.reshape(shape)
def cumsum_eps(A, axis=None, eps=np.finfo('float').eps):
A = np.array(A)
if axis is None:
A = np.ravel(A)
else:
axes = list(xrange(len(A.shape)))
axes[axis], axes[-1] = axes[-1], axes[axis]
A = np.transpose(A, axes)
if A.dtype == np.float32:
out = cumsum_eps_f4(np.ravel(np.ascontiguousarray(A)), A.shape, eps)
else:
raise ValueError('Unsupported dtype')
if axis is not None: out = np.transpose(out, axes)
return out
then you can compile it like this (Windows, Visual C++ 2008 Command Line):
\Python27\Scripts\cython.exe cumsum_eps.pyx
cl /c cumsum_eps.c /IC:\Python27\include /IC:\Python27\Lib\site-packages\numpy\core\include
F:\Users\sadaszew\Downloads>link /dll cumsum_eps.obj C:\Python27\libs\python27.lib /OUT:cumsum_eps.pyd
or like this (Linux use .so extension/Cygwin use .dll extension, gcc):
cython cumsum_eps.pyx
gcc -c cumsum_eps.c -o cumsum_eps.o -I/usr/include/python2.7 -I/usr/lib/python2.7/site-packages/numpy/core/include
gcc -shared cumsum_eps.o -o cumsum_eps.so -lpython2.7
and use like this:
from cumsum_eps import *
import numpy as np
x = np.array([[1,2,3,4], [5,6,7,8]], dtype=np.float32)
>>> print cumsum_eps(x)
[ 1. 3. 6. 10. 15. 21. 28. 36.]
>>> print cumsum_eps(x, axis=0)
[[ 1. 2. 3. 4.]
[ 6. 8. 10. 12.]]
>>> print cumsum_eps(x, axis=1)
[[ 1. 3. 6. 10.]
[ 5. 11. 18. 26.]]
>>> print cumsum_eps(x, axis=0, eps=1)
[[ 1. 2. 3. 4.]
[ 6. 8. 10. 12.]]
>>> print cumsum_eps(x, axis=0, eps=2)
[[ 0. 2. 3. 4.]
[ 5. 8. 10. 12.]]
>>> print cumsum_eps(x, axis=0, eps=3)
[[ 0. 0. 3. 4.]
[ 5. 6. 10. 12.]]
>>> print cumsum_eps(x, axis=0, eps=4)
[[ 0. 0. 0. 4.]
[ 5. 6. 7. 12.]]
>>> print cumsum_eps(x, axis=0, eps=8)
[[ 0. 0. 0. 0.]
[ 0. 0. 0. 8.]]
>>> print cumsum_eps(x, axis=1, eps=3)
[[ 0. 0. 3. 7.]
[ 5. 11. 18. 26.]]
and so on, of course normally eps would be some small value, here integers are used just for the sake of demonstration / easiness of typing.
If you need this for double as well the _f8 variants are trivial to write and another case has to be handled in cumsum_eps().
When you're happy with the implementation you should make it a proper part of your setup.py - Cython setup.py
Update #1: If you have good compiler support in run environment you could try [Theano][3] to implement either compensation algorithm or your original idea:
import numpy as np
import theano
import theano.tensor as T
from theano.ifelse import ifelse
A=T.vector('A')
sum=T.as_tensor_variable(np.asarray(0, dtype=np.float64))
res, upd=theano.scan(fn=lambda cur_sum, val: ifelse(T.lt(cur_sum+val, 1.0), np.asarray(0, dtype=np.float64), cur_sum+val), outputs_info=sum, sequences=A)
f=theano.function(inputs=[A], outputs=res)
f([0.9, 2, 3, 4])
will give [0 2 3 4] output. In either Cython or this you get at least +/- performance of the native code.

Categories