I am working on a backtesting code for a Maximize Sharpe portfolio.
Here I am using PyPortfolioOpt instead of cvxpy here to compute the weight, however, I am confused in where to configure the number of months for lookback period for this optimization, e.g. a a rolling 36 months optimization.
I believe lookback window should be set here, but I am not quite sure how.
def pre_segment_func_nb(c, find_weights_nb, history_len, ann_factor, num_tests, srb_sharpe):
if history_len == -1:
# Look back at the entire time period
close = c.close[:c.i, c.from_col:c.to_col]
else:
# Look back at a fixed time period
if c.i - history_len <= 0:
return (np.full(c.group_len, np.nan),) # insufficient data
close = c.close[c.i - history_len:c.i, c.from_col:c.to_col]
Here is the full code FYR.
import os
import numpy as np
import pandas as pd
import yfinance as yf
from datetime import datetime
import pytz
from numba import njit
import matplotlib.pyplot as plt
import seaborn as sns
from pypfopt.efficient_frontier import EfficientFrontier
from pypfopt import risk_models
from pypfopt import expected_returns
from pypfopt import base_optimizer
from pypfopt import objective_functions
from pypfopt.discrete_allocation import DiscreteAllocation, get_latest_prices
from pypfopt import EfficientSemivariance
from pypfopt.expected_returns import mean_historical_return
from pypfopt.expected_returns import returns_from_prices
import vectorbt as vbt
from vectorbt.generic.nb import nanmean_nb
from vectorbt.portfolio.nb import order_nb, sort_call_seq_nb
from vectorbt.portfolio.enums import SizeType, Direction
from IPython.display import set_matplotlib_formats
%matplotlib inline
%config InlineBackend.figure_format='retina'
symbols = ['NVDA','AMD','TSLA','NET','JPM','AAPL']
start_date = datetime(2012, 1, 1, tzinfo=pytz.utc)
end_date = datetime.today().strftime('%Y-%m-%d')
num_tests = 1000000
vbt.settings.array_wrapper['freq'] = 'days'
vbt.settings.returns['year_freq'] = '252 days'
vbt.settings.portfolio['seed'] = 42
vbt.settings.portfolio.stats['incl_unrealized'] = True
yfdata = vbt.YFData.download(symbols, start=start_date, end=end_date)
ohlcv = yfdata.concat()
price = ohlcv['Close'].fillna(method='ffill')
srb_sharpe = np.full(price.shape[0], np.nan)
#njit
def pre_sim_func_nb(c, every_nth):
# Define rebalancing days
c.segment_mask[:, :] = False
c.segment_mask[every_nth::every_nth, :] = True
return ()
#njit
def find_weights_nb(c, price, num_tests):
# Find optimal weights based on best Sharpe ratio
returns = (price[1:] - price[:-1]) / price[:-1]
returns = returns[1:, :] # cannot compute np.cov with NaN
mean = nanmean_nb(returns)
cov = np.cov(returns, rowvar=False) # masked arrays not supported by Numba (yet)
best_sharpe_ratio = -np.inf
#best_sharpe_ratio = -np.inf
weights = np.full(c.group_len, np.nan, dtype=np.float_)
for i in range(num_tests):
# Generate weights
w = np.random.random_sample(c.group_len)
w = w / np.sum(w)
# Compute annualized mean, covariance, and Sharpe ratio
p_return = np.sum(mean * w) * ann_factor
p_std = np.sqrt(np.dot(w.T, np.dot(cov, w))) * np.sqrt(ann_factor)
sharpe_ratio = p_return / p_std
if sharpe_ratio > best_sharpe_ratio:
best_sharpe_ratio = sharpe_ratio
weights = w
return best_sharpe_ratio, weights
#njit
def pre_segment_func_nb(c, find_weights_nb, history_len, ann_factor, num_tests, srb_sharpe):
if history_len == -1:
# Look back at the entire time period
close = c.close[:c.i, c.from_col:c.to_col]
else:
# Look back at a fixed time period
if c.i - history_len <= 0:
return (np.full(c.group_len, np.nan),) # insufficient data
close = c.close[c.i - history_len:c.i, c.from_col:c.to_col]
# Find optimal weights
best_sharpe_ratio, weights = find_weights_nb(c, close, num_tests)
srb_sharpe[c.i] = best_sharpe_ratio
# Update valuation price and reorder orders
size_type = SizeType.TargetPercent
direction = Direction.LongOnly
order_value_out = np.empty(c.group_len, dtype=np.float_)
for k in range(c.group_len):
col = c.from_col + k
c.last_val_price[col] = c.close[c.i, col]
sort_call_seq_nb(c, weights, size_type, direction, order_value_out)
return (weights,)
#njit
def order_func_nb(c, weights):
col_i = c.call_seq_now[c.call_idx]
return order_nb(
weights[col_i],
c.close[c.i, c.col],
size_type=SizeType.TargetPercent
)
ann_factor = returns.vbt.returns.ann_factor
def pyopt_find_weights(sc, price, num_tests): # no #njit decorator = it's a pure Python function
price = pd.DataFrame(price, columns=symbols)
avg_returns = expected_returns.mean_historical_return(price)
cov_mat = risk_models.CovarianceShrinkage(price).ledoit_wolf()
ef = EfficientFrontier(avg_returns, cov_mat, weight_bounds=(0,1))
min_weight, max_weight = 0.05, 0.35
constraints=[
# {"type": "eq", "fun": lambda w: np.sum(w) - 1}, # sum to 1
{"type": "ineq", "fun": lambda w: w - min_weight}, # greater than min_weight
{"type": "ineq", "fun": lambda w: max_weight - w}, # less than max_weight
]
weights = ef.nonconvex_objective(
objective_functions.sharpe_ratio,
objective_args=(avg_returns, cov_mat),
weights_sum_to_one=True,
constraints = constraints
)
clean_weights = ef.clean_weights()
weights = np.array([clean_weights[symbol] for symbol in symbols])
best_sharpe_ratio = base_optimizer.portfolio_performance(weights, avg_returns, cov_mat)[2]
latest_prices = get_latest_prices(price)
da = DiscreteAllocation(clean_weights, latest_prices, total_portfolio_value=25000)
allocation, leftover = da.lp_portfolio(reinvest=True)
w = pd.DataFrame(clean_weights, columns=clean_weights.keys(), index = [0])
w.to_excel('w.xlsx')
return best_sharpe_ratio, weights
pyopt_srb_sharpe = np.full(price.shape[0], np.nan)
pyopt_srb_pf = vbt.Portfolio.from_order_func(
price,
order_func_nb,
pre_sim_func_nb=pre_sim_func_nb,
pre_sim_args=(63,), #63 #84
pre_segment_func_nb=pre_segment_func_nb.py_func, # run pre_segment_func_nb as pure Python function
pre_segment_args=(pyopt_find_weights, -1, ann_factor, num_tests, pyopt_srb_sharpe),
cash_sharing=True,
group_by=True,
use_numba=False # run simulate_nb as pure Python function
)
greatly appreciated
I'm trying to solve the following MINLP, basically attempting to maximize the likelihood of a certain portfolio reaching a "ceiling" performance. My first attempt at the code is below.
EDIT: Math says maximize, should say minimize
from pyscipopt import Model, quicksum
import numpy as np
import pandas as pd
from random import uniform, normalvariate
model=Model()
t=20000
stocks_portfolio = {}
stocks_df = pd.DataFrame(np.zeros((150,4)),columns = {'ids','Mean','cost','stdev'})
noptions = len(stocks_df)
stocks_df['ids'] = [i for i in range(noptions)]
stocks_df['Mean'] = [uniform(500,2500) for i in range(noptions)]
stocks_df['cost'] = [stocks_df.loc[i,'Mean']*uniform(50,250) for i in range(noptions)]
stocks_df['stdev'] = [stocks_df.loc[i,'Mean']*uniform(0.2,0.5) for i in range(noptions)]
cov_mat = np.array([[normalvariate(0,0.3) for i in range(noptions)] for j in range(noptions)])
for i in range(len(stocks_df)):
stocks_portfolio[i] = model.addVar(vtype='B')
model.addCons(quicksum(stocks_portfolio[i] for i in range(noptions))==15)
model.addCons(quicksum(stocks_df.loc[i, 'cost']*stocks_portfolio[i] for i in range(noptions)) <= 600000)
stand_in = model.addVar(vtype='C')
model.addCons(stand_in>=(t-quicksum(stocks_df.loc[i,'Mean']*stocks_portfolio[i] for i in range(noptions)))/((quicksum(stocks_portfolio[i]*stocks_df.loc[i,'stdev']**2 for i in range(noptions))+quicksum(2*stocks_portfolio[i]*stocks_portfolio[j]*cov_mat[i,j] for i in range(noptions) for j in range(noptions)))**0.5))
model.setObjective(stand_in,'minimize')
model.optimize()
model.getCondition()
portfolios = []
for i in range(noptions):
if model.getVal(stocks_portfolio[i]) > 0.9:
portfolios.append(i)
The performance here has been slow and unwieldy, and I was wondering if I'm thinking about the question all wrong.
I have this formula that is used to predict athletic performance base on daily stress.
It is based on 5 constant unique to each person. I'm trying to find these based on daily stress and performance testing that has been done. I'm new to programming and I don't know where to start.
see the formula
Performance= Fitness(=daily stress+yesterday fitness put decay) - Fatigue(daily stress+yesterday fatigue put decay) +P0
This is a sample of the data: data
thank you
import pandas as pd
import numpy as np
import math
from scipy import optimize
data = pd.read_csv('data_mod1.csv')
TSS = data['stress'].fillna(0)
arr = np.array(TSS)
#data = data.dropna()
a = [arr[0]]
b = [arr[0]]
x = arr[1:]
def Banister(x, t1, t2,k1,k2, c):
for v in x:
a.append(a[-1]*np.exp(-1/t1) + v)
b.append(b[-1]*np.exp(-1/t2) + v)
data['fit'] = pd.Series(a)
data['fat'] = pd.Series(b)
data['perf'] = ((data['fit']*k1)-(data['fat']*k2))+c
return data['perf']
# In[ ]:
from scipy.optimize import curve_fit
fit = curve_fit(Banister, arr,data[data.index], p0=[20, 10,1 ,2, 50])
This feels like it should be a simple problem but I am newer to python, in R i would use a foreach loop that gave me an option to combine.
I have tried a for loop that lets me print out all the values i need but i want them collected into a vector of values that i can use later.
from scipy.stats import gamma
import scipy.stats as stats
import numpy as np
import random
data2 = np.random.gamma(1,2, size = 500)
gammT = np.log(data2 + 1)
mean = np.mean(gammT)
sd = np.std(gammT)
a = (mean/ sd)**2
b = (sd**2)/ mean
for i in range(1,100):
gammT = random.sample(list(gammT), 500)
gamm = np.random.gamma(a,b, size = len(gammT))
s = stats.anderson_ksamp([gammT,gamm])
s = s[2]
print(s)
So i am able to print all the values i want but i want them all to be gathered together in a vector of values. I have tried to append and make lists but am not able to get them together.
from scipy.stats import gamma
import scipy.stats as stats
import numpy as np
import random
gammT = np.log(data2.iScore + 1)
mean = np.mean(gammT)
sd = np.std(gammT)
a = (mean/ sd)**2
b = (sd**2)/ mean
#initialize empty list
result=[]
for i in range(100):
# removed (1,100) you only need range(100) for 100 elements
gammT = random.sample(list(gammT), 500)
gamm = np.random.gamma(a,b, size = len(gammT))
s = stats.anderson_ksamp([gammT,gamm])
s = s[2]
#append calculation to list
result.append(s)
print(s)
print(result)
i have data length is over 3000.
below are code for making 20days value ( Volume Ration in Stock market)
it took more than 2 min.
is there any good way to reduce running time.
import pandas as pd
import numpy as np
from pandas.io.data import DataReader
import matplotlib.pylab as plt
data = DataReader('047040.KS','yahoo',start='2010')
data['vr']=0
data['Volume Ratio']=0
data['acend']=0
data['vr'] = np.sign(data['Close']-data['Open'])
data['vr'] = np.where(data['vr']==0,0.5,data['vr'])
data['vr'] = np.where(data['vr']<0,0,data['vr'])
data['acend'] = np.multiply(data['Volume'],data['vr'])
for i in range(len(data['Open'])):
if i<19:
data['Volume Ratio'][i]=0
else:
data['Volume Ratio'][i] = ((sum(data['acend'][i-19:i]))/((sum(data['Volume'][i-19:i])-sum(data['acend'][i-19:i]))))*100
Consider using conditional row selection and rolling.sum():
data.loc[data.index[:20], 'Volume Ratio'] = 0
data.loc[data.index[20:], 'Volume Ratio'] = (data.loc[:20:, 'acend'].rolling(window=20).sum() / (data.loc[:20:, 'Volume'].rolling(window=20).sum() - data.loc[:20:, 'acend'].rolling(window=20).sum()) * 100
or, simplified - .rolling.sum() will create np.nan for the first 20 values so just use .fillna(0):
data['new_col'] = data['acend'].rolling(window=20).sum().div(data['Volume'].rolling(window=20).sum().subtract(data['acend'].rolling(window=20).sum()).mul(100).fillna(0)