how to calculate the Sharpe ratio in different time intervals? - python

import pandas as pd
import numpy as np
bt_dict = {
'position_strategy1':df1_1hour,
'position_strategy2':df2_6hour
}
def backtest(bt_dict):
# ohlc_df is one hour timeframe
ohlc_df['date'] = pd.to_datetime(ohlc_df['date'])
ohlc_df.set_index('date',inplace=True)
all_df = pd.DataFrame(index=ohlc_df.index)
all_df['close'] = ohlc_df['close']
for strategy_name,strategy_df in bt_dict.items():
bt_dict[strategy_name] = strategy_df[['date','position']].rename(columns={"position":f"position_{strategy_name}"}).dropna()
bt_dict[strategy_name]['date'] = pd.to_datetime(bt_dict[strategy_name]['date'])
bt_dict[strategy_name].set_index('date', inplace=True)
all_df[f'position_{strategy_name}'] = bt_dict[strategy_name]
all_df = all_df.fillna(method='ffill')
all_df['position'] = all_df['position_strategy1']*0.6 +\
all_df['position_strategy2']*0.4
all_df = all_df.dropna()
all_df['pnl'] = all_df['position'].shift(1) * (all_df['close'] / all_df['close'].shift(1) - 1)
sharpe_ratio = all_df['pnl'].mean() / all_df['pnl'].std() * np.sqrt(365 * 24)
return sharpe_ratio
for example, I have two strategies, including 1-hour and 6-hour data frame, want to combine them and calculate the sharpe ratio
I had tried to calculate multiple timeframes, but the result was wrong..
i hope i get the right way to calculate sharpe ratio in different timeframe

Related

Fast numpy operation on part of dataframe

I have a pandas dataframe with several columns. 2 of them are date and time and others are numerical.
I need to perform fast in-place calculation on the numerical part of the dataframe. Currently I ignore first 2 columns and convert numericals to a numpy and use it further down the code as a numpy.
However I want to keep these processed numericals in the dataframe without touching date and time.
Now:
# tanh norm
def tanh_ret():
data = df.to_numpy()
mu = np.mean(data)
std = np.std(data)
return 0.5 * (np.tanh(0.01 * ((data - mu) / std)) + 1)
del df['Date']
del df['Time']
nums = tanh_ret()
del df
What I want: normalize 3 df columns out of 5 in-place
Mind that the dataset is large so I would prefer as less data copy as possible but also reasonably fast.
Create a random pandas dataframe
I consider 5 columns of random values, you can place what you want. The Time and Date columns are set to a constant value.
import datetime as dt
import pandas as pd
import numpy as np
df = pd.DataFrame(np.random.random((100,5)))
now = dt.datetime.now()
df['Time'] = now.strftime('%H:%M:%S')
df['Date'] = now.strftime('%m/%d/%Y')
Inplace numerical processing
def tanh_ret(data):
mu = data.mean()
std = data.std()
return 0.5 * (np.tanh(0.01 * ((data - mu) / std)) + 1)
num_cols =df.columns[df.dtypes != 'object']
df[num_cols] = df[num_cols].transform(tanh_ret)
Alternatively:
tan_map = {col: tanh_ret for col in num_cols}
df[num_cols] = df.transform(tan_map)
Source

function for counting number of oscillations

i'm trying to build a counter which would detect number of oscillations in a given data
i'm following a method where the slope of each point is calculated and based on negative and positive direction change
is there a preexisting function for this
i'm using the following code and i'm unable to leave out the cells with zero values after taking difference between each cell
import pandas as pd
import xlsxwriter
from asammdf import MDF
import numpy as np
dat = MDF("file_name.dat")
app = dat.get('variabe_name')
df = pd.DataFrame(app)
print(df)
data = df.loc[0, 0:]
#time step = T
T = 0.01
# Number of sample points
N = len(data)
# sample spacing
x = np.linspace(0.0, N*T, N, endpoint=False)
x1 = data.diff()
print(x1)
df1_1 = pd.DataFrame([x1])
df1_1 = df1_1.replace(0, np.nan)
df1_1 = df1_1.dropna(how='all', axis=0)
df1_1 = df1_1.dropna()
df1 = pd.DataFrame.transpose(df1_1)
df1.to_csv("output.csv")'
my data looks like this

Getting my def function to apply.() to my stocks

I'm having trouble getting my code to work. Im coding python in a backtesting environment called "Quantopian". Regardless, the .apply(), series, .pd or whatever terminology is beyond my skill level. (assuming I'm even on the right track lol)
What I'm trying to accomplish:
Taking a couple stocks and constantly calculating the MACD. Then when the indicator meets a certain condition, the algo purchases or sells that specific stock.
What the MACD is simplistically:
A momentum indicator that looks at historical data, using 12, 26 and 9 day Exponential Moving Averages and comparing them with each other.
I've designed my own function, thats not my problem....
Help:
I'm trying to apply it to the pool of stocks in my universe to constantly calculate the MACD every minute.
Where I'm specifically confused:
I defined a MACD function but don't know how to get it to calculate every minute for whatever stocks are in my pool.
CODE:
import numpy as np
import math
import talib as ta
import pandas as pd
def initialize(context):
set_commission(commission.PerTrade(cost=10))
context.stocks = symbols('AAPL', 'GOOG_L')
def handle_data(context, data):
for stock in context.stocks:
prices_fast = data.history(context.stocks, "close", 390, "1m").resample("30min").dropna()
prices_slow = data.history(context.stocks, "close", 390, "1m").resample("30min").dropna()
prices_signal = data.history(context.stocks, "close", 390, "1m").resample("30min").dropna()
curr_price = data.history(context.stocks, "price", 30, "1m").resample("30min")[-1:].dropna()
series = pd.Series([stock]).dropna()
macd = series.apply(MACD)
macd_func = stock.apply(MACD)
if macd_func[stock] > 0:
order(stock, 1)
print macd_func
record(macd=macd_func[stock])
def MACD(prices_fast, prices_slow, prices_signal, curr_price):
# Setting MACD Conditions:
slow = 26
fast = 12
signal = 9
# Calcualting Averages:
avg_fast = pd.rolling_sum(prices_fast[:fast], fast)[-1:] / fast
avg_slow = pd.rolling_sum(prices_slow[:slow], slow)[-1:] / slow
avg_signal = pd.rolling_sum(prices_signal[:signal], signal)[-1:] / signal
# Calculating the Weighting Multipliers:
A = 2 / (fast + 1)
B = 2 / (slow + 1)
C = 2 / (signal + 1)
# Calculating the Exponential Moving Averages:
EMA_fast = (curr_price * A) + [avg_fast * (1 - A)]
EMA_slow = (curr_price * B) + [avg_slow * (1 - B)]
EMA_signal = (curr_price * C) + [avg_signal * (1 - C)]
# Calculating MACD Histogram:
macd = EMA_fast - EMA_slow - EMA_signal
If someone could give me a handle, I would GREATLY appreciate it!
Thank you very VERY much,
Mike

Relative Strength Index in python pandas

I am new to pandas. What is the best way to calculate the relative strength part in the RSI indicator in pandas? So far I got the following:
from pylab import *
import pandas as pd
import numpy as np
def Datapull(Stock):
try:
df = (pd.io.data.DataReader(Stock,'yahoo',start='01/01/2010'))
return df
print 'Retrieved', Stock
time.sleep(5)
except Exception, e:
print 'Main Loop', str(e)
def RSIfun(price, n=14):
delta = price['Close'].diff()
#-----------
dUp=
dDown=
RolUp=pd.rolling_mean(dUp, n)
RolDown=pd.rolling_mean(dDown, n).abs()
RS = RolUp / RolDown
rsi= 100.0 - (100.0 / (1.0 + RS))
return rsi
Stock='AAPL'
df=Datapull(Stock)
RSIfun(df)
Am I doing it correctly so far? I am having trouble with the difference part of the equation where you separate out upward and downward calculations
It is important to note that there are various ways of defining the RSI. It is commonly defined in at least two ways: using a simple moving average (SMA) as above, or using an exponential moving average (EMA). Here's a code snippet that calculates various definitions of RSI and plots them for comparison. I'm discarding the first row after taking the difference, since it is always NaN by definition.
Note that when using EMA one has to be careful: since it includes a memory going back to the beginning of the data, the result depends on where you start! For this reason, typically people will add some data at the beginning, say 100 time steps, and then cut off the first 100 RSI values.
In the plot below, one can see the difference between the RSI calculated using SMA and EMA: the SMA one tends to be more sensitive. Note that the RSI based on EMA has its first finite value at the first time step (which is the second time step of the original period, due to discarding the first row), whereas the RSI based on SMA has its first finite value at the 14th time step. This is because by default rolling_mean() only returns a finite value once there are enough values to fill the window.
import datetime
from typing import Callable
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pandas_datareader.data as web
# Window length for moving average
length = 14
# Dates
start, end = '2010-01-01', '2013-01-27'
# Get data
data = web.DataReader('AAPL', 'yahoo', start, end)
# Get just the adjusted close
close = data['Adj Close']
# Define function to calculate the RSI
def calc_rsi(over: pd.Series, fn_roll: Callable) -> pd.Series:
# Get the difference in price from previous step
delta = over.diff()
# Get rid of the first row, which is NaN since it did not have a previous row to calculate the differences
delta = delta[1:]
# Make the positive gains (up) and negative gains (down) Series
up, down = delta.clip(lower=0), delta.clip(upper=0).abs()
roll_up, roll_down = fn_roll(up), fn_roll(down)
rs = roll_up / roll_down
rsi = 100.0 - (100.0 / (1.0 + rs))
# Avoid division-by-zero if `roll_down` is zero
# This prevents inf and/or nan values.
rsi[:] = np.select([roll_down == 0, roll_up == 0, True], [100, 0, rsi])
rsi.name = 'rsi'
# Assert range
valid_rsi = rsi[length - 1:]
assert ((0 <= valid_rsi) & (valid_rsi <= 100)).all()
# Note: rsi[:length - 1] is excluded from above assertion because it is NaN for SMA.
return rsi
# Calculate RSI using MA of choice
# Reminder: Provide ≥ `1 + length` extra data points!
rsi_ema = calc_rsi(close, lambda s: s.ewm(span=length).mean())
rsi_sma = calc_rsi(close, lambda s: s.rolling(length).mean())
rsi_rma = calc_rsi(close, lambda s: s.ewm(alpha=1 / length).mean()) # Approximates TradingView.
# Compare graphically
plt.figure(figsize=(8, 6))
rsi_ema.plot(), rsi_sma.plot(), rsi_rma.plot()
plt.legend(['RSI via EMA/EWMA', 'RSI via SMA', 'RSI via RMA/SMMA/MMA (TradingView)'])
plt.show()
dUp= delta[delta > 0]
dDown= delta[delta < 0]
also you need something like:
RolUp = RolUp.reindex_like(delta, method='ffill')
RolDown = RolDown.reindex_like(delta, method='ffill')
otherwise RS = RolUp / RolDown will not do what you desire
Edit: seems this is a more accurate way of RS calculation:
# dUp= delta[delta > 0]
# dDown= delta[delta < 0]
# dUp = dUp.reindex_like(delta, fill_value=0)
# dDown = dDown.reindex_like(delta, fill_value=0)
dUp, dDown = delta.copy(), delta.copy()
dUp[dUp < 0] = 0
dDown[dDown > 0] = 0
RolUp = pd.rolling_mean(dUp, n)
RolDown = pd.rolling_mean(dDown, n).abs()
RS = RolUp / RolDown
My answer is tested on StockCharts sample data.
StockChart RSI info
def RSI(series, period):
delta = series.diff().dropna()
u = delta * 0
d = u.copy()
u[delta > 0] = delta[delta > 0]
d[delta < 0] = -delta[delta < 0]
u[u.index[period-1]] = np.mean( u[:period] ) #first value is sum of avg gains
u = u.drop(u.index[:(period-1)])
d[d.index[period-1]] = np.mean( d[:period] ) #first value is sum of avg losses
d = d.drop(d.index[:(period-1)])
rs = pd.DataFrame.ewm(u, com=period-1, adjust=False).mean() / \
pd.DataFrame.ewm(d, com=period-1, adjust=False).mean()
return 100 - 100 / (1 + rs)
#sample data from StockCharts
data = pd.Series( [ 44.34, 44.09, 44.15, 43.61,
44.33, 44.83, 45.10, 45.42,
45.84, 46.08, 45.89, 46.03,
45.61, 46.28, 46.28, 46.00,
46.03, 46.41, 46.22, 45.64 ] )
print RSI( data, 14 )
#output
14 70.464135
15 66.249619
16 66.480942
17 69.346853
18 66.294713
19 57.915021
I too had this question and was working down the rolling_apply path that Jev took. However, when I tested my results, they didn't match up against the commercial stock charting programs I use, such as StockCharts.com or thinkorswim. So I did some digging and discovered that when Welles Wilder created the RSI, he used a smoothing technique now referred to as Wilder Smoothing. The commercial services above use Wilder Smoothing rather than a simple moving average to calculate the average gains and losses.
I'm new to Python (and Pandas), so I'm wondering if there's some brilliant way to refactor out the for loop below to make it faster. Maybe someone else can comment on that possibility.
I hope you find this useful.
More info here.
def get_rsi_timeseries(prices, n=14):
# RSI = 100 - (100 / (1 + RS))
# where RS = (Wilder-smoothed n-period average of gains / Wilder-smoothed n-period average of -losses)
# Note that losses above should be positive values
# Wilder-smoothing = ((previous smoothed avg * (n-1)) + current value to average) / n
# For the very first "previous smoothed avg" (aka the seed value), we start with a straight average.
# Therefore, our first RSI value will be for the n+2nd period:
# 0: first delta is nan
# 1:
# ...
# n: lookback period for first Wilder smoothing seed value
# n+1: first RSI
# First, calculate the gain or loss from one price to the next. The first value is nan so replace with 0.
deltas = (prices-prices.shift(1)).fillna(0)
# Calculate the straight average seed values.
# The first delta is always zero, so we will use a slice of the first n deltas starting at 1,
# and filter only deltas > 0 to get gains and deltas < 0 to get losses
avg_of_gains = deltas[1:n+1][deltas > 0].sum() / n
avg_of_losses = -deltas[1:n+1][deltas < 0].sum() / n
# Set up pd.Series container for RSI values
rsi_series = pd.Series(0.0, deltas.index)
# Now calculate RSI using the Wilder smoothing method, starting with n+1 delta.
up = lambda x: x if x > 0 else 0
down = lambda x: -x if x < 0 else 0
i = n+1
for d in deltas[n+1:]:
avg_of_gains = ((avg_of_gains * (n-1)) + up(d)) / n
avg_of_losses = ((avg_of_losses * (n-1)) + down(d)) / n
if avg_of_losses != 0:
rs = avg_of_gains / avg_of_losses
rsi_series[i] = 100 - (100 / (1 + rs))
else:
rsi_series[i] = 100
i += 1
return rsi_series
You can use rolling_apply in combination with a subfunction to make a clean function like this:
def rsi(price, n=14):
''' rsi indicator '''
gain = (price-price.shift(1)).fillna(0) # calculate price gain with previous day, first row nan is filled with 0
def rsiCalc(p):
# subfunction for calculating rsi for one lookback period
avgGain = p[p>0].sum()/n
avgLoss = -p[p<0].sum()/n
rs = avgGain/avgLoss
return 100 - 100/(1+rs)
# run for all periods with rolling_apply
return pd.rolling_apply(gain,n,rsiCalc)
# Relative Strength Index
# Avg(PriceUp)/(Avg(PriceUP)+Avg(PriceDown)*100
# Where: PriceUp(t)=1*(Price(t)-Price(t-1)){Price(t)- Price(t-1)>0};
# PriceDown(t)=-1*(Price(t)-Price(t-1)){Price(t)- Price(t-1)<0};
# Change the formula for your own requirement
def rsi(values):
up = values[values>0].mean()
down = -1*values[values<0].mean()
return 100 * up / (up + down)
stock['RSI_6D'] = stock['Momentum_1D'].rolling(center=False,window=6).apply(rsi)
stock['RSI_12D'] = stock['Momentum_1D'].rolling(center=False,window=12).apply(rsi)
Momentum_1D = Pt - P(t-1) where P is closing price and t is date
You can get a massive speed up of Bill's answer by using numba. 100 loops of 20k row series( regular = 113 seconds, numba = 0.28 seconds ). Numba excels with loops and arithmetic.
import numpy as np
import numba as nb
#nb.jit(fastmath=True, nopython=True)
def calc_rsi( array, deltas, avg_gain, avg_loss, n ):
# Use Wilder smoothing method
up = lambda x: x if x > 0 else 0
down = lambda x: -x if x < 0 else 0
i = n+1
for d in deltas[n+1:]:
avg_gain = ((avg_gain * (n-1)) + up(d)) / n
avg_loss = ((avg_loss * (n-1)) + down(d)) / n
if avg_loss != 0:
rs = avg_gain / avg_loss
array[i] = 100 - (100 / (1 + rs))
else:
array[i] = 100
i += 1
return array
def get_rsi( array, n = 14 ):
deltas = np.append([0],np.diff(array))
avg_gain = np.sum(deltas[1:n+1].clip(min=0)) / n
avg_loss = -np.sum(deltas[1:n+1].clip(max=0)) / n
array = np.empty(deltas.shape[0])
array.fill(np.nan)
array = calc_rsi( array, deltas, avg_gain, avg_loss, n )
return array
rsi = get_rsi( array or series, 14 )
rsi_Indictor(close,n_days):
rsi_series = pd.DataFrame(close)
# Change = close[i]-Change[i-1]
rsi_series["Change"] = (rsi_series["Close"] - rsi_series["Close"].shift(1)).fillna(0)
# Upword Movement
rsi_series["Upword Movement"] = (rsi_series["Change"][rsi_series["Change"] >0])
rsi_series["Upword Movement"] = rsi_series["Upword Movement"].fillna(0)
# Downword Movement
rsi_series["Downword Movement"] = (abs(rsi_series["Change"])[rsi_series["Change"] <0]).fillna(0)
rsi_series["Downword Movement"] = rsi_series["Downword Movement"].fillna(0)
#Average Upword Movement
# For first Upword Movement Mean of first n elements.
rsi_series["Average Upword Movement"] = 0.00
rsi_series["Average Upword Movement"][n] = rsi_series["Upword Movement"][1:n+1].mean()
# For Second onwords
for i in range(n+1,len(rsi_series),1):
#print(rsi_series["Average Upword Movement"][i-1],rsi_series["Upword Movement"][i])
rsi_series["Average Upword Movement"][i] = (rsi_series["Average Upword Movement"][i-1]*(n-1)+rsi_series["Upword Movement"][i])/n
#Average Downword Movement
# For first Downword Movement Mean of first n elements.
rsi_series["Average Downword Movement"] = 0.00
rsi_series["Average Downword Movement"][n] = rsi_series["Downword Movement"][1:n+1].mean()
# For Second onwords
for i in range(n+1,len(rsi_series),1):
#print(rsi_series["Average Downword Movement"][i-1],rsi_series["Downword Movement"][i])
rsi_series["Average Downword Movement"][i] = (rsi_series["Average Downword Movement"][i-1]*(n-1)+rsi_series["Downword Movement"][i])/n
#Relative Index
rsi_series["Relative Strength"] = (rsi_series["Average Upword Movement"]/rsi_series["Average Downword Movement"]).fillna(0)
#RSI
rsi_series["RSI"] = 100 - 100/(rsi_series["Relative Strength"]+1)
return rsi_series.round(2)
For More Information
You do this using finta package as well just to add above
ref: https://github.com/peerchemist/finta/tree/master/examples
import pandas as pd
from finta import TA
import matplotlib.pyplot as plt
ohlc = pd.read_csv("C:\\WorkSpace\\Python\\ta-lib\\intraday_5min_IBM.csv", index_col="timestamp", parse_dates=True)
ohlc['RSI']= TA.RSI(ohlc)
It is not really necessary to calculate the mean, because after they are divided, you only need to calculate the sum, so we can use Series.cumsum ...
def rsi(serie, n):
diff_serie = close.diff()
cumsum_incr = diff_serie.where(lambda x: x.gt(0), 0).cumsum()
cumsum_decr = diff_serie.where(lambda x: x.lt(0), 0).abs().cumsum()
rs_serie = cumsum_incr.div(cumsum_decr)
rsi = rs_serie.mul(100).div(rs_serie.add(1)).fillna(0)
return rsi
Less code here but seems to work for me:
df['Change'] = (df['Close'].shift(-1)-df['Close']).shift(1)
df['ChangeAverage'] = df['Change'].rolling(window=2).mean()
df['ChangeAverage+'] = df.apply(lambda x: x['ChangeAverage'] if x['ChangeAverage'] > 0 else 0,axis=1).rolling(window=14).mean()
df['ChangeAverage-'] = df.apply(lambda x: x['ChangeAverage'] if x['ChangeAverage'] < 0 else 0,axis=1).rolling(window=14).mean()*-1
df['RSI'] = 100-(100/(1+(df['ChangeAverage+']/df['ChangeAverage-'])))

Random walk pandas

I am trying to quickly create a simulated random walk series in pandas.
import pandas as pd
import numpy as np
dates = pd.date_range('2012-01-01', '2013-02-22')
y2 = np.random.randn(len(dates))/365
Y2 = pd.Series(y2, index=dates)
start_price = 100
would like to build another date series starting at start_price at beginning date and growing by the random growth rates.
pseudo code:
P0 = 100
P1 = 100 * exp(Y2)
P2 = P1 * exp(Y2)
very easy to do in excel, but I cant think of way of doing it without iterating over a dataframe/series with pandas and I also bump my head doing that.
have tried:
p = Y2.apply(np.exp)-1
y = p.cumsum(p)
y.plot()
this should give the cumulatively compound return since start
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
def geometric_brownian_motion(T = 1, N = 100, mu = 0.1, sigma = 0.01, S0 = 20):
dt = float(T)/N
t = np.linspace(0, T, N)
W = np.random.standard_normal(size = N)
W = np.cumsum(W)*np.sqrt(dt) ### standard brownian motion ###
X = (mu-0.5*sigma**2)*t + sigma*W
S = S0*np.exp(X) ### geometric brownian motion ###
return S
dates = pd.date_range('2012-01-01', '2013-02-22')
T = (dates.max()-dates.min()).days / 365
N = dates.size
start_price = 100
y = pd.Series(
geometric_brownian_motion(T, N, sigma=0.1, S0=start_price), index=dates)
y.plot()
plt.show()

Categories