Random walk pandas - python

I am trying to quickly create a simulated random walk series in pandas.
import pandas as pd
import numpy as np
dates = pd.date_range('2012-01-01', '2013-02-22')
y2 = np.random.randn(len(dates))/365
Y2 = pd.Series(y2, index=dates)
start_price = 100
would like to build another date series starting at start_price at beginning date and growing by the random growth rates.
pseudo code:
P0 = 100
P1 = 100 * exp(Y2)
P2 = P1 * exp(Y2)
very easy to do in excel, but I cant think of way of doing it without iterating over a dataframe/series with pandas and I also bump my head doing that.
have tried:
p = Y2.apply(np.exp)-1
y = p.cumsum(p)
y.plot()
this should give the cumulatively compound return since start

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
def geometric_brownian_motion(T = 1, N = 100, mu = 0.1, sigma = 0.01, S0 = 20):
dt = float(T)/N
t = np.linspace(0, T, N)
W = np.random.standard_normal(size = N)
W = np.cumsum(W)*np.sqrt(dt) ### standard brownian motion ###
X = (mu-0.5*sigma**2)*t + sigma*W
S = S0*np.exp(X) ### geometric brownian motion ###
return S
dates = pd.date_range('2012-01-01', '2013-02-22')
T = (dates.max()-dates.min()).days / 365
N = dates.size
start_price = 100
y = pd.Series(
geometric_brownian_motion(T, N, sigma=0.1, S0=start_price), index=dates)
y.plot()
plt.show()

Related

Convert create.bspline.basis((rangval,nbasis,norder=norder,breaks=breaks) in R to BSpline(t, c, degree) in Python

I am trying to convert the function create.bspline.basis(rangval,nbasis,norder=norder,breaks=breaks) from R to Python.
I have tried using the BSpline(t, c , degree) function from scipy.interpolate but cannot seem the get the same results as I got in R.
Here is my R code:
library('fda')
df <- read.csv('data.csv', header = T)
df <- df[,1] # convert data frame to vector. Vector has a length of 1941.
rangval <- c(1, length(df))
breaks = seq(1,length(df),length.out=length(df)/60)
norder = 6
nbasis = length(breaks) - 2 + norder
bbasis = create.bspline.basis(rangval,nbasis,norder=norder,breaks=breaks)
plot(bbasis)
Here is my Python code:
from scipy.interpolate import BSpline
import matplotlib.pyplot as plt
import numpy as np
import math
Load Data files as data frames:
df = pd.read_csv('RData\Data.csv')
Convert data frames to arrays:
df = df.to_numpy()
breaks = np.linspace(1, (len(df)), math.ceil(len(df)/60))
k = math.ceil(len(df)) - 2
degree = 5
order = degree + 1
n = order + k
t = np.zeros(math.ceil(len(df)/60) + (2*order) # create an array to store knots.
t[:order] = 0
t[-:order] = len(df)
t[order:-order] = (breaks)
xx = np.arange(len(df))
for i in range(0,n):
c = np.zeros(n)
c[i] = 1
spl = BSpline(t,c,degree)
plt.plot(xx, spl(xx))
plt.show()
With the python code above I get the plot:
For my python code, I will like to have all the BSplines in a single object and not just be able to plot each BSpline one at a time. My goal is to use the full set of BSplines and it to pass it into another function to perform smoothing.
Basically, I am trying to follow the steps below but using Python:

function for counting number of oscillations

i'm trying to build a counter which would detect number of oscillations in a given data
i'm following a method where the slope of each point is calculated and based on negative and positive direction change
is there a preexisting function for this
i'm using the following code and i'm unable to leave out the cells with zero values after taking difference between each cell
import pandas as pd
import xlsxwriter
from asammdf import MDF
import numpy as np
dat = MDF("file_name.dat")
app = dat.get('variabe_name')
df = pd.DataFrame(app)
print(df)
data = df.loc[0, 0:]
#time step = T
T = 0.01
# Number of sample points
N = len(data)
# sample spacing
x = np.linspace(0.0, N*T, N, endpoint=False)
x1 = data.diff()
print(x1)
df1_1 = pd.DataFrame([x1])
df1_1 = df1_1.replace(0, np.nan)
df1_1 = df1_1.dropna(how='all', axis=0)
df1_1 = df1_1.dropna()
df1 = pd.DataFrame.transpose(df1_1)
df1.to_csv("output.csv")'
my data looks like this

Having problems with scipy.integrate.solve_ivp

I am trying to use scipy.integrate.solve_ivp to calculate the solutions to newton's gravitation equation for my n body simulation, however I am confused how the function is passed into solve_ivp. I have the following code:
import numpy as np
import matplotlib.pyplot as plt
from scipy.integrate import solve_ivp
import matplotlib as mpl
from mpl_toolkits.mplot3d import Axes3D
G = 6.67408e-11
m_sun = 1988500e24
m_jupiter = 1898.13e24
m_earth = 5.97219e24
au = 149597870.700e3
v_factor = 1731460
year = 31557600.e0
init_s = np.array([-6.534087946884256E-03*au, 6.100454846284101E-03*au, 1.019968145073305E-04*au, -6.938967653087248E-06*v_factor, -5.599052606952444E-06*v_factor, 2.173251724105919E-07*v_factor])
init_j = np.array([2.932487231769548E+00*au, -4.163444383137574E+00*au, -4.833604407653648E-02*au, 6.076788230491844E-03*v_factor, 4.702729516645153E-03*v_factor, -1.554436340872727E-04*v_factor])
variables_s = init_s
variables_j = init_j
N = 2
tStart = 0e0
t_End = 25*year
Nt = 2000
dt = t_End/Nt
temp_end = dt
t=tStart
domain = (t, temp_end)
planetsinit = np.vstack((init_s, init_j))
planetspos = planetsinit[:,0:3]
mass = np.vstack((1988500e24, 1898.13e24))
def weird_division(n, d):
return n / d if d else 0
variables_save = np.zeros((N,6,Nt))
variables_save[:,:,0] = planetsinit
pos_s = planetspos[0]
pos_j = planetspos[1]
while t < t_End:
t_index = int(weird_division(t, dt))
for index in range(len(planetspos)):
for otherindex in range(len(planetspos)):
if index != otherindex:
x1_p1, x2_p1, x3_p1 = planetsinit[index, 0:3]
x1_p2, x2_p2, x3_p2 = planetsinit[otherindex, 0:3]
m = mass[otherindex]
def f_grav(t, y):
x1_p1, x2_p1, x3_p1, v1_p1, v2_p1, v3_p1 = y
x1_diff = x1_p1 - x1_p2
x2_diff = x2_p1 - x2_p2
x3_diff = x3_p1 - x3_p2
dydt = [v1_p1,
v2_p1,
v3_p1,
-(x1_diff)*G*m/((x1_diff)**2+(x2_diff)**2+(x3_diff)**2)**(3/2),
-(x2_diff)*G*m/((x1_diff)**2+(x2_diff)**2+(x3_diff)**2)**(3/2),
-(x3_diff)*G*m/((x1_diff)**2+(x2_diff)**2+(x3_diff)**2)**(3/2)]
return dydt
solution = solve_ivp(fun=f_grav, t_span=domain, y0=planetsinit[index])
planetsinit[index] = solution['y'][0:6, -1]
variables_save[index,:,t_index] = solution['y'][0:6, -1]
planetspos[index] = planetsinit[index][0:3]
t += dt
temp_end += dt
domain = (t,temp_end)
pos_s = variables_save[0,0:3,:]
pos_j = variables_save[1,0:3,:]
plt.plot(variables_save[0,0:3,:][0], variables_save[0,0:3,:][1])
plt.plot(variables_save[1,0:3,:][0], variables_save[1,0:3,:][1])
The code above works very nicely and produces a stable orbit. However when I calculate the acceleration outside the function and feed that through into the f_grav function, something goes wrong and produces an orbit which is no longer stable. However I am perplexed as I don't know why the data is different as to be it seems like that I have passed through the exactly same inputs. Which leads me to think that maybe its the way the the function f_grav is interpolated by the solve_ivp integrator? To calculate the acceleration outside all I do is change the following code in the loop to:
x1_p1, x2_p1, x3_p1 = planetsinit[index, 0:3]
x1_p2, x2_p2, x3_p2 = planetsinit[otherindex, 0:3]
m = mass[otherindex]
x1_diff = x1_p1 - x1_p2
x2_diff = x2_p1 - x2_p2
x3_diff = x3_p1 - x3_p2
ax = -(x1_diff)*G*m/((x1_diff)**2+(x2_diff)**2+(x3_diff)**2)**(3/2)
ay = -(x2_diff)*G*m/((x1_diff)**2+(x2_diff)**2+(x3_diff)**2)**(3/2)
az = -(x3_diff)*G*m/((x1_diff)**2+(x2_diff)**2+(x3_diff)**2)**(3/2)
def f_grav(t, y):
x1_p1, x2_p1, x3_p1, v1_p1, v2_p1, v3_p1 = y
dydt = [v1_p1,
v2_p1,
v3_p1,
ax,
ay,
az]
return dydt
solution = solve_ivp(fun=f_grav, t_span=domain, y0=planetsinit[index])
planetsinit[index] = solution['y'][0:6, -1]
variables_save[index,:,t_index] = solution['y'][0:6, -1]
planetspos[index] = planetsinit[index][0:3]
As I said I don't know why different orbits are produces which are shown below and any hints as to why or how to solve it would me much appreciated. To clarify why I can't use the working code as it is, as when more bodies are involved I aim to sum the accelerations contribution of all the other planets which isn't possible this way where the acceleration is calculated in the function itself.
Sorry for the large coding chunks but I did feel it was appropriate as then it could be run and the problem itself is clearer.
Both have the same time period, dt, however the orbit on the left is stable and the one on the right is not

How to select paths from a Monte Carlo Simulation that meet a specified condition (Python)?

I am using geometric Brownian motion to simulate the path of stock prices. However, I want to be able to set a boundary value and select and count the paths that lie above the boundary. There are a lot of posts about Monte Carlo Simulations on this forum. But I cannot find how to select individual paths.
My code for the simulation is below. The paths are stored in the variable price_paths. The red line in the image is an example of the boundary B that I want to set. So for example, how can I select the paths that are above the line and never touch it, store them in a separate list or dataframe and then count them and do other calculations on them?
import numpy as np
from math import log, e
import matplotlib.pyplot as plt
from pandas_datareader import data
stock = 'AAPL' # Name of the stock
start = '2015/6/1'
T = 1
B = 355
apple = data.DataReader(stock, 'yahoo', start)
spot = apple["Adj Close"][-1]
apple['log_price'] = np.log(apple['Adj Close'])
apple['log_return'] = apple['log_price'].diff()
mu = apple.log_return.sum()/apple.log_return.count()
mu = mu*365 + 0.5*apple.log_return.var()*np.sqrt(365)
sigma = np.std(apple.log_price)
s0 = apple["Adj Close"][-1]
delta_t = 1/365
num_reps = 10000
steps = T/delta_t
plt.figure(figsize=(15,10))
closing_prices = []
for j in range(num_reps):
price_path = [s0] # The paths are stored here
st = s0
for i in range(int(steps)):
drift = (mu - 0.5 * sigma**2) * delta_t
diffusion = sigma * np.sqrt(delta_t) * np.random.normal(0, 1)
st = st*e**(drift + diffusion)
price_path.append(st)
closing_prices.append(price_path[-1])
plt.plot(price_path)
plt.ylabel('stock price',fontsize=15)
plt.xlabel('steps',fontsize=15)
plt.axhline(y = B, color = 'r', linestyle = '-') # Line to indicate the barrier B
plt.show()

2D interpolation with datetime format X values

I have a dataframe like this:
import pandas as pd
import numpy as np
time = pd.date_range('2018-05-14 00:00:00','2018-05-14 01:00:00',freq='5T')
mile = np.linspace(0,100,10)
x = list(time)*len(mile)
y = np.repeat(mile,len(time))
z = []
for i in range(0,10,1):
z.extend(np.random.normal(loc=i*5, scale=5, size=13))
origin_data = pd.DataFrame({'x':x, 'y':y ,'z':z})
origin_data contains original points' positions(x and y) and their values(z). I want to interpolate the z values at these new positions: x = pd.date_range('2018-05-14 00:00:00','2018-05-14 01:00:00',freq='1T') with y = np.linspace(0,91,1) just using bilinear interpolation.
I learned about the official document about scipy.interpolate.interp2d. But its x type is numeric, mine is datetime. Also, the tutorial's z values are calculated while mine are already given so I don't know how to handle the order of input z value. Could anyone give me an example that contains an interpolation result plot based on the dataframe I provided above? Thank you for your attention!
This is the way I found to this question:
import pandas as pd
import numpy as np
from scipy import interpolate
import itertools
time = pd.date_range('2018-05-14 00:00:00','2018-05-14 01:00:00',freq='5T')
mile = np.arange(0,100,10)
x = list(time)*len(mile)
y = np.repeat(mile,len(time))
z = []
for i in range(0,10,1):
z.extend(np.random.normal(loc=i*5, scale=5, size=13))
origin_data = pd.DataFrame({'x':x, 'y':y ,'z':z})
from ggplot import *
ggplot(aes(x = 'x', y = 'y', colour = 'z'), data = origin_data) +\
geom_point(size = 100) +\
scale_x_date(labels = date_format("%Y-%m-%d %H:%M:S"))
x_numeric = [x.timestamp() for x in origin_data['x']]
x_cors = pd.unique(x_numeric)
y_cors = pd.unique(origin_data['y'])
cors = list(itertools.product(x_cors,y_cors))
interp_func = interpolate.LinearNDInterpolator(cors, z)
interp_func = interpolate.CloughTocher2DInterpolator(cors, z)
new_x = [x.timestamp() for x in pd.date_range('2018-05-14 00:00:00','2018-05-14 01:00:00',freq='1T')]
new_y = np.arange(0,91,1)
new_cors = list(itertools.product(new_x,new_y))
new_z = interp_func(new_cors)
new_data = pd.DataFrame({'x':[x[0] for x in new_cors],
'y':[x[1] for x in new_cors],
'z':new_z})
import datetime
new_data['x'] = [pd.Timestamp(x,unit = 's') for x in new_data['x']]
ggplot(aes(x='x',y='y',colour='z'),data=new_data) +\
geom_point(size=100) +\
scale_x_date(labels = date_format("%Y-%m-%d %H:%M:S"))

Categories