I am trying to create a plotyly chart with some subplots based on Use button to filter different data in plotly python
The chart generation function takes as input a stock symbol, dict of periods (m1, m3, m5... for different minutes) and stock specific period dataframes.
I am trying the put the periods as buttons, so that on a period button click, the corresponding dataframe (OHLC) can be loaded along with period dependent indicators - MACD, RSI and ADX.
The issue is only the last period df is loaded and the buttons are not showing/ loading the period specific OHLCV.
Below is the function
def plot_plotly_v3(in_stock, in_period_stock_mdf_df_dict,n):
f_stock = in_stock
f_period_stock_mdf_df_dict = in_period_stock_mdf_df_dict
period_buttons = []
i = 0
period_length = len(f_period_stock_mdf_df_dict) # to calculate visible args
period_frequency_dict = config.c_g_period_python_freq_dict # broker period to python period
for period, stock_period_df in f_period_stock_mdf_df_dict.items():
stock_period_df.index = stock_period_df.index.droplevel([1, 2])
fig = make_subplots(rows=4, cols=1, shared_xaxes=True, vertical_spacing=0.007, row_heights=[.35, .20, .20, .25],
subplot_titles=('', 'MACD', 'RSI', 'ADX'))
# removing all empty dates and build complete timeline from start date to end date
py_frequency = period_frequency_dict.get(period) # broker period to python period mapping
dt_all = pd.date_range(start=stock_period_df.index[0], end=stock_period_df.index[-1], freq=py_frequency)
# retrieve the dates that ARE in the original datset
dt_obs = [d.strftime("%Y-%m-%d %H:%M:%S") for d in pd.to_datetime(stock_period_df.index)]
# define dates with missing values
dt_breaks = [d for d in dt_all.strftime("%Y-%m-%d %H:%M:%S").tolist() if not d in dt_obs]
in_period_int = int(config.g_period_2_period_int_dict.get(period))
dvalue_ms = in_period_int * 60 * 1000
fig.update_xaxes(rangebreaks=[dict(values=dt_breaks, dvalue=dvalue_ms)])
fig_title = in_stock + ' for period ' + period + ' for range ' + 'From: ' + \
str(stock_period_df.index[0]) + ' To: ' + str(stock_period_df.index[-1])
for annotation in fig['layout']['annotations']:
annotation['textangle'] = -90
fig.update_annotations(xshift=-620, yshift=-100)
# Plot OHLC and MAs on 1st subplot
# Plot MACD trace on 2nd row
# Plot RSI trace on 3rd row
# Plot ADX trace on 4th row
# create visible args - True/ False list depending upon period/df dict
visible_args = create_true_false_list_v0(i, period_length)
# create a button object for the period we are on
button = dict(label=period, method="update", args=[{"visible": visible_args}])
# add the button to our list of buttons
period_buttons.append(button)
# i is an iterable used to tell our "args" list which value to set to True
i += 1
fig.update_layout(updatemenus=[dict(type="buttons",direction="right", x = 1,y = 1,buttons = period_buttons)],
height=800, width=1350, title=fig_title, xaxis_rangeslider_visible=False)
fig.show()
Would appreciate any support/ guidance.
Question:
1] Is it possible to display the title in the same level as toolbar/modebar.
2] Is it possible to display the period buttons in the same level as the toolbar.
Related
My target is to create some AreaChart3D plots in an automatically way.
Precisely, for example I have the following picture:
This table is automatically outputed by a tool.
I can have only one graph, maybe 2 graphs or even 100 graphs (does not matter so much), it is important every time I will have this kind of behavior with Location, Speed, and some times inside.
Now, I would like to have in the second sheet(ws2_obj) 4 graphs or maybe 2 graphs depends how many graphs will be outputed by the tool.
If I would have had a fixed number of graph it would have been easier.
Because this graphs are not fixed i have to cover the entire sheet and I do not know how to do it.
Also, there is another question: how to handle Depth (% of base) using Python?
from openpyxl.chart import (
AreaChart3D,
Reference,
)
wb_obj = xl.load_workbook('Plots.xlsx')
ws_obj = wb_obj.active
ws2_obj = wb_obj.create_sheet("Graphs")
c1 = AreaChart3D()
c1.legend = None
c1.style = 15
cats = Reference(ws_obj, min_col=1, min_row=7, max_row=200)
data = Reference(ws_obj, min_col=2, min_row=6, max_col=8, max_row=200)
c1.add_data(data, titles_from_data=True)
c1.set_categories(cats)
ws2_obj.add_chart(c1, "A1")
wb_obj.save("Plots.xlsx")
The Code above produces only one graph, but how should I proceed to create 2 or 4 or 100 graphs?
Later edit 1:
I tried something like this and it is almost working:
for i in range(1, 4):
c1 = AreaChart3D()
cats = Reference(ws_obj, min_col=1, min_row=7, max_row=200)
data = Reference(ws_obj, min_col=2, min_row=6, max_col=i * int(step), max_row=200)
c1.title = ws_obj.cell(row=1, column=i * int(step)).value
c1.legend = None
c1.style = 15
c1.y_axis.title = 'Fire Time'
c1.x_axis.title = 'Temperature'
c1.z_axis.title = "Velocity"
c1.add_data(data, titles_from_data=True)
c1.set_categories(cats)
ws2_obj.add_chart(c1, "A2")
For me the last ws2_obj.add_chart(c1, "A2") seems to be the problematic one.
Instead of A2 I would like to use something like ws2_obj.add_chart(c1, cell(row=2, column=i)).value but does not working.
Later Edit 2
I have observed if you want to add a chart to a certain cell, you have to use something like: ws2_obj.add_chart(my_chart, "R2")
In order to use the for loop I tried to find out a way to get this value R2.
Please, see below:
my_cells = []
for i in range(1, 4):
my_cell = ws2_obj.cell(row=1, column=i * int(step) - (int(step) - 1))
my_cells.append(my_cell)
print("My_Cell:", my_cells)
new_cells = []
for i in my_cells:
new_cells.append(re.findall("\W\w\d", str(i)))
new_new_cells = []
for i in new_cells:
new_new_cells.append(i[0])
print("new_new_cells:", new_new_cells)
final_list = [re.sub('[^a-zA-Z0-9]+', '', _) for _ in new_new_cells]
print("final list:", final_list)
And the output will be ['A1', 'H1', 'O1']
and then I can output the graph:
for i in range(1, 4):
c1 = AreaChart3D()
# my_cell = ws2_obj.cell(row=i, column=i * int(step))
cats = Reference(ws_obj, min_col=1, min_row=7, max_row=255)
data = Reference(ws_obj, min_col=2, min_row=6, max_col=i * int(step), max_row=255)
c1.title = ws_obj.cell(row=1, column=i * int(step)).value
c1.legend = None
c1.style = 20
c1.y_axis.title = 'Time'
c1.x_axis.title = 'Location'
c1.z_axis.title = "Velocity"
c1.add_data(data, titles_from_data=True)
c1.set_categories(cats)
c1.x_axis.scaling.max = 75
c1.y_axis.scaling.max = 50
c1.z_axis.scaling.max = 25
ws2_obj.add_chart(c1, str(final_list[i - 1]))
You can create a list of the series data (position where the data series starts). The list has 1 element per series. Iterate the list creating a chart for each and ensure you have some means to place the chart in a unique position.
Example code with comments below.
import openpyxl as xl
from openpyxl.chart import (
AreaChart3D,
Reference,
)
def create_chart(tl, maxr, hdr, x_ax):
"""
Creates a standard Area 3D Chart
"""
cht = AreaChart3D()
cht.legend = None
cht.style = 15
cht.title = hdr + " Chart"
cht.x_axis.title = x_ax
cht.y_axis.title = 'Something' # Some text for the y axis
data = Reference(ws_obj, min_col=tl[0], min_row=tl[1], max_col=tl[0]+1, max_row=maxr-1)
cht.add_data(data, titles_from_data=True)
return cht
## Sheet constants
chart_header = 'Speed' # It is assumed this is located in a merged cell
x_axis_header = 'Location'
series_topleft_header = 25
## Load Workbook and Sheet of Excel with data series
wb_obj = xl.load_workbook('Plots.xlsx')
ws_obj = wb_obj.active
## Get the total used rows in the sheet (end of the series table)
maxrows = ws_obj.max_row
speed_row = ''
speed_col_start = ''
speed_col_end = ''
speed_col_letter = ''
## Get a list of Merged cell in the sheet these contain the Headers for position referencing
merge_list = [m.coord for m in ws_obj.merged_cells.ranges]
## Search for the row with Header name 'Speed' to use as reference for series data postioning
for merge_element in ws_obj.merged_cells:
merge_cell_val = merge_element.start_cell.internal_value
if merge_cell_val.lower() == chart_header.lower():
speed_row = merge_element.max_row
speed_col_start = merge_element.min_col
speed_col_end = merge_element.max_col
speed_col_letter = merge_element.start_cell.column_letter
series_header_row = speed_row + 1
series1_start = speed_col_letter + str(series_header_row+1)
"""
Obtain the location of the top left cell where the series data exists
This searches the row below the header (containing the text 'Speed') for the first
series header (i.e. 25 in the example) and adds each position to the series_postion_list
"""
series_position_list = []
for row in ws_obj.iter_rows(min_row=series_header_row,
max_row=series_header_row,
min_col=speed_col_start,
max_col=speed_col_end):
for cell in row:
if cell.value == series_topleft_header:
series_position_list.append([cell.column, series_header_row])
## Create the Charts
"""
With the series_position_list indicating the top left cell of the series data
and the number of rows in the series determined be the maxrows - 1. This data
can be passed to the create_chart function to create the chart.
Charts are placed below the series data table from Column A with two charts
per row. First row for chart location is 2 rows below the series table.
"""
chart_start_row = maxrows + 2
chart_col = 'A'
"""
The series_position_list is used to create 1 chart per series
The chart creation function takes the top left coordinate and max rows along
with Chart header name and x axis header name
"""
for enum, top_left in enumerate(series_position_list, 1):
chart_obj = create_chart(top_left,
maxrows,
chart_header + ' ' + str(enum),
x_axis_header)
## This sets the position the chart will be placed. Based on standard size
## of plot area the charts are 16 rows and 10 columns apart
if enum == 1:
pass
elif enum % 2 == 1:
chart_col = 'A'
chart_start_row += 16
else:
chart_col = 'J'
## Adds chart to the Excel sheet
print(f"Adding chart {chart_header + ' ' + str(enum)} to Excel:")
print(f"Series Data Start; Row:{str(top_left[1]+1)} Column:{top_left[0]}")
ws_obj.add_chart(chart_obj, chart_col + str(chart_start_row))
print("--------------\n")
wb_obj.save("Plots.xlsx")
-----------------Additional Information--------------
add_chart is a method that accepts two arguments; the chart object and optionally an anchor point (i.e the top left cell where the chart is placed in the sheet). Use of .value at the end of
ws2_obj.add_chart(c1, cell(row=2, column=i)).value
is invalid as you are not entering the method into the cell you are using the method to add the chart object c1 at position cell(row=2, column=i). Using cell(row=2, column=i) is also an invalid syntax. You may have meant to use ws2_obj.cell(row=2, column=i) as the anchor. This would be accepted by the add_chart method however when saving the worksheet there would be an error on checking the anchor point as this expects the anchor to be an "Excel style coordinate" i.e. a string like 'A2' rather than a cell object like ws2_obj.cell(row=2, column=i). Even using (2, 1) would fail the same check.
To set the anchor points I will show how to do two options; All charts on the same row and X charts across the row then start next X charts on the next row etc.
Place all charts on same row;
If you are going to put all charts on the same row then the row coord will not change and only the column position needs adjustment for each chart.
You can generate the anchor points like below, the example code uses a for loop with 18 elements;
from openpyxl.utils.cell import coordinate_to_tuple
from openpyxl.utils import get_column_letter
anchor = 'A2' # Position of anchor, first anchor point is 'A2'
column_separation = 9 # Number of columns to separate each chart
for i in range(0, 18):
coord_tuple = coordinate_to_tuple(anchor)
row = coord_tuple[0]
col_offset = column_separation if i > 0 else 0
col_new = get_column_letter(coord_tuple[1] + col_offset)
anchor = f'{col_new}{row}'
print(f'Adding chart at Anchor point {anchor}')
ws2_obj.add_chart(c1, anchor)
This will put the chart at the following achor points;
A2, J2, S2, AB2, AK2, AT2, BC2, BL2, BU2, CD2, CM2, CV2, DE2, DN2, DW2, EF2, EX2, EO2
Placing the charts is a pattern.
Placing the charts is a pattern of rows and columns is similar to the previous code however when the number of charts reaches your limit the 'row' value has to change and the column resets back to 'A'.
The example code again uses a for loop with 18 elements and splits the charts into rows of max_chart_row, set to 5 in this case;
from openpyxl.utils.cell import coordinate_to_tuple
from openpyxl.utils import get_column_letter
anchor = 'A2'
column_separation = 9
max_chart_row = 5
for i in range(0, 18):
coord_tuple = coordinate_to_tuple(anchor)
row = coord_tuple[0]
col_offset = column_separation if i > 0 else 0
# When the number of charts across the row is reached, set the row to 16 more than the current
# and reset the column offset to 0
if i % (max_chart_row) == 0 and i != 0:
row = row + 16
col_offset = 0
col_new = get_column_letter(col_offset+1)
else:
col_new = get_column_letter(coord_tuple[1] + col_offset)
anchor = f'{col_new}{row}'
print(f'Adding chart at Anchor point {anchor}')
ws2_obj.add_chart(c1, anchor)
This will put the chart at the following achor points;
A2, J2, S2, AB2, AK2,
A18, J18, S18, AB18, AK18,
A34, J34, S34, AB34, AK34,
A50, J50, S50
Hello need some help with this problem
a = pd.date_range(start="2001-01-01", freq="T", periods=520000)
This creates the date-range i need for 1 year. I want to do the same for the next 80 years. The end result should be a date range for 80year but every year ends after 520000min. Then i add the date range to my dataset.
# this is the data
ALL_Data = pd.DataFrame({"Lebensverbrauch_Min": LebensverbrauchMIN,
"HPT": Heisspunkttemperatur_Sim1,
"Innentemperatur": StartS,
"Verlustleistung": V_Leistung,
"SolarEintrag": SolarEintrag,
"Lastfaktor": K_Load_Faktor
})
# How many minutes are left in the year
DatenJahr = len(pd.date_range(start=str(xx) + "-01-01", freq="T", periods=520000))
VollesJahr = len(pd.date_range(start=str(xx) + "-01-01", freq="T", end=str(xx + 1) + "-01-01"))
GG = (VollesJahr - DatenJahr)
d = pd.DataFrame(np.zeros((GG, 6)), columns=['Lebensverbrauch_Min', 'HPT', 'Innentemperatur','Verlustleistung',
'SolarEintrag', 'Lastfaktor',])
#combine Data with 0
ALL_Data = pd.concat([ALL_Data, d])
seems to work but the complete code needs 4h to run so we will see
so I am writing a program that monitors and records your usage time of foreground applications and saves them in a SQL database. I then want to retrieve the data from previous days and compile it all together into a stacked bar chart. Here, the x-axis will have the different days over which usage was recorded, and the various stacks in each bar will represent each app that was used.
In my program, I created 2 tables, one to record each day's app usage (with each new day's data having a different primary key id), and another table to record the primary key for each day.
Table 1:
_id
Application
usage_time
0
Google Chrome
245.283942928347
1
PyCharm
450.3939754962921
1
SQLiteStudio
140.2376308441162
1
Google Chrome
5.008131980896
Table 2:
Date
daily_id
2021-07-18 07:25:25.376734
0
2021-07-18 07:27:57.419574
1
Within my stacked bar chart program, I have come up with this code to refine the data to put into the stacked bar chart:
conn = sqlite3.connect('daily_usage_monitor.sqlite', detect_types=sqlite3.PARSE_DECLTYPES)
all_app_data = conn.execute('SELECT all_usage_information.date, monitor.application, monitor.usage_time FROM all_usage_information INNER JOIN monitor ON all_usage_information.daily_id = monitor._id ORDER BY all_usage_information.date, monitor.usage_time ASC').fetchall()
for date, app, usage_time in all_app_data:
print(f'{date} - {app}: {usage_time}')
conn.close()
daily_data = {}
# Create nested dictionary - key = each date, value = dictionary of different apps & their time usage durations
for date, app, time in all_app_data:
conditions = [date not in daily_data, app != 'loginwindow']
if all(conditions):
daily_data[date] = {app: time}
elif not conditions[0] and conditions[1]:
daily_data[date].update({app: time})
print(daily_data) # TODO: REMOVE AFTER TESTING
total_time = 0
# Club any applications that account for <5% of total time into 1 category called 'Other'
for date, app_usages in daily_data.items():
total_time = sum(time for app, time in app_usages.items())
refined_data = {}
for key, value in app_usages.items():
if value/total_time < 0.05:
refined_data['Others'] = refined_data.setdefault('Others', 0) + value
else:
refined_data[key] = value
daily_data[date] = refined_data
print(daily_data) # TODO: REMOVE AFTER TESTING
# Takes the nested dictionary and breaks it into a labels list and a dictionary with apps & time usages for each day
# Sorts data so it can be used to create composite bar chart
final_data = {}
labels = []
for date, app_usages in daily_data.items():
labels.append(date)
for app, time in app_usages.items():
if app not in final_data:
final_data[app] = [time]
else:
final_data[app].append(time)
This is the kind of output I am currently getting:
{'Google Chrome': [245.283942928347, 190.20031905174255], 'SQLiteStudio': [145.24058270454407], 'PyCharm': [1166.0021023750305]}
The problem here is that for the days where an application had 0 usage time, it is not being recorded in the list. Therefore, the insertion order of the stacked bar chart will not be correct and will show the wrong apps for the wrong dates. How can I fix this?
This is one method I tried, but of course it's not working because you cannot index into a dictionary:
for app, usage in final_data.items():
for date, app_usages in daily_data.items():
if app not in app_usages:
usage.insert(app_usages.index(app), 0)
I'm trying to write a trading algo and I am very new to python.
Lots of things are easy to understand but I get lost easily. I have a strategy I want to use, but the coding is getting in the way.
I want to create two moving averages and when they cross I want that to be a signal.
The part im I am currently struggling with is also including information about the prior period.
When
MovingAverage1( last 10 candles ) == MovingAverage2( Last 20 candles ),
that's a signal,
but is it a buy or sell?
When
MovingAVerage1( last 10 candles after skipping most recent ) > MovingAverage2( last 10 candles after skipping most recent )
then sell.
Here is what I've got so far, where the MA-s I am using are being simplified for this question:
class MyMACrossStrategy (Strategy):
"""
Requires:
symbol - A stock symbol on which to form a strategy on.
bars - A DataFrame of bars for the above symbol.
short_window - Lookback period for short moving average.
long_window - Lookback period for long moving average."""
def __init__(self, symbol, bars, short_window=4, long_window=9):
self.symbol = symbol
self.bars = bars
self.short_window = short_window
self.long_window = long_window
# Function Helper for indicators
def fill_for_noncomputable_vals(input_data, result_data):
non_computable_values = np.repeat(
np.nan, len(input_data) - len(result_data)
)
filled_result_data = np.append(non_computable_values, result_data)
return filled_result_data
def simple_moving_average(data, period):
"""
Simple Moving Average.
Formula:
SUM(data / N)
"""
catch_errors.check_for_period_error(data, period)
# Mean of Empty Slice RuntimeWarning doesn't affect output so it is
# supressed
with warnings.catch_warnings():
warnings.simplefilter("ignore", category=RuntimeWarning)
sma = [np.mean(data[idx-(period-1):idx+1]) for idx in range(0, len(data))]
sma = fill_for_noncomputable_vals(data, sma)
return sma
def hull_moving_average(data, period):
"""
Hull Moving Average.
Formula:
HMA = WMA(2*WMA(n/2) - WMA(n)), sqrt(n)
"""
catch_errors.check_for_period_error(data, period)
hma = wma(
2 * wma(data, int(period/2)) - wma(data, period), int(np.sqrt(period))
)
return hma
def generate_signals(self):
"""Returns the DataFrame of symbols containing the signals
to go long, short or hold (1, -1 or 0)."""
signals = pd.DataFrame(index=self.bars.index)
signals['signal'] = 0.0
# Create the set of moving averages over the
# respective periods
signals['Fast_Line'] = sma(bars['Close'], self.short_window)
signals['Slow_line'] = hma(bars['Close'], self.long_window)
signals1['Fast_Line'] = sma(bars['Close'], self.short_window[-1])
signals1['Slow_line'] = hma(bars['Close'], self.long_window[-1])
# Create a 'signal' (invested or not invested) when the short moving average crosses the long
# moving average, but only for the period greater than the shortest moving average window
signals['signal'][self.short_window:] = np.where(signals['Fast_Line'][self.short_window:]
> signals['Slow_line'][self.short_window:], 1.0, 0.0)
# Take the difference of the signals in order to generate actual trading orders
signals['positions'] = signals['signal'].diff()
if signals['Fast_Line'] = signals['Slow_Line'] and ...
return signals
Hopefully my question makes sense.
I am assuming that you want to test your strategy first before using it in live market. You can download the stock data from yahoo finance in csv format. And you can upload with below code:
import pandas as pd
import numpy as np
data = pd.read_csv('MSFT.csv')
once the data is stored in the pandas dataframe data, you can moving average of the Closing price with following code:
if you are planning the crossover strategy
sma_days=20
lma_days=50
data['SMA_20']=data['Close'].rolling(window=sma_days,center=False).mean()
data['SMA_50']=data['Close'].rolling(window=lma_days,center=False).mean()
data['SIGNAL']=np.where(data['SMA_20']>data['SMA_50'],'BUY','SELL')
I've setup a simulation example below.
Setup:
I have weekly data, say 6 years of data each week of around 1000 stocks some weeks more other weeks less than 1000. I randomly chose 75 stocks at time t0. At t1 some stocks dies (probability p, goes out of fashion) or leave the index (structural such as merging). I need to simulate stocks so that every week I've exactly 75 stocks. Every week some stocks dies (between 0 and 75) and I pick new ones not from the existing 75. I also check if the stock leaves do to structural reasons. Every week I calculate the returns of the 75 stocks.
Questions: Is there an obvious why to improve the speed. I started with Pandas objects (group sort) which was to slow. I haven't tried to parallel the loop. I'm more interesting to hear if I should use numba (but it doesn't have the np.in1d function) or if there is a faster way to shuffle (I actually only need to shuffle the ones). I've also think about creating a fixed array with all stocks id using NaN, the problem here is that I need 75 names so I still need to filter out these NaN every week.
Maybe this is to detailed problem for this forum, I apologize if that's the case
Code:
from timeit import default_timer
import numpy as np
# Create dataset
n_weeks = 312 # Approximately 6 years of weekly data
n_stocks = np.random.normal(1000, 5, n_weeks).astype(dtype=np.uint16) # Around 1000 stocks every week but not fixed
idx_new_week = np.cumsum(np.hstack((0, n_stocks)))
# We give each stock a stock idea
n_obs = n_stocks.sum()
stock_id = np.ones([n_obs], dtype=np.uint16)
for j in range(1, n_weeks+1):
stock_id[idx_new_week[j-1]:idx_new_week[j]] = np.cumsum(np.ones(n_stocks[j-1]))
stock_rtn = np.random.normal(0, 0.25/np.sqrt(52), n_obs) # Simulated forward (one week ahead) return for each stock
# Simulation part
# Week 0 pick randomly 75 stocks
# Week n >=1 a stock dies for two reasons
# 1) randomness (probability 'p')
# 2) structural event (could be merger, fall out of index).
# We cannot assume that it is always the high stockid which dies for structural reasons (as it looks like here)
# If a stock dies we randomely pick a stock from the "deak" stock dataset (not included the ones which dies this week)
n_sim = 100 # I want this to be 1 mill
n_stock_cand = 75 # For this example we pick 75 stocks
p_survial = 0.90
# The weekly periodcal returns
pf_rtn = np.zeros([n_weeks, n_sim])
start = default_timer()
for k in range(0, n_sim):
# Randomely choice n_stock_cand at time zero
boolean_list = np.array([False] * (n_stocks[0] - n_stock_cand) + [True] * n_stock_cand)
np.random.shuffle(boolean_list) # Shuffle the list
stock_id_this_week = stock_id[idx_new_week[0]:idx_new_week[1]][boolean_list]
stock_rtn_this_week = stock_rtn[idx_new_week[0]:idx_new_week[1]][boolean_list]
# This part only simulate the Buzz portfolio names - later we simulate returns and from specific holdings of the 75 names
for j in range(1, n_weeks):
pf_rtn[j-1, k] = stock_rtn_this_week.mean()
# Find the number of stocks to keep
boolean_keep_stocks = np.random.rand(n_stock_cand) < p_survial
# Next we need to check if a stock is still part of the universe next period
stock_cand_temp = stock_id[idx_new_week[j-1]:idx_new_week[j]]
stock_rtn_temp = stock_rtn[idx_new_week[j-1]:idx_new_week[j]]
boolean_keep_stocks = (boolean_keep_stocks) & (np.in1d(stock_id_this_week, stock_cand_temp, assume_unique=True))
n_stocks_to_replace = n_stock_cand - boolean_keep_stocks.sum() # Number of new stocks to pick this week
if n_stocks_to_replace > 0:
# We have to pick from stocks which is not part of the portfolio already
boolean_cand = np.in1d(stock_cand_temp, stock_id_this_week, assume_unique=True, invert=True)
n_stocks_to_pick_from = boolean_cand.sum()
boolean_list = np.array([False] * (n_stocks_to_pick_from - n_stocks_to_replace) + [True] * n_stocks_to_replace)
np.random.shuffle(boolean_list) # Shuffle the list
# First avoid picking the same stock twich, next pick from the unique candidate list
stock_id_new = stock_cand_temp[boolean_cand][boolean_list] # The new stocks
stock_rtn_new = stock_rtn_temp[boolean_cand][boolean_list] # and their returns
stock_id_this_week = np.hstack((stock_id_this_week[boolean_keep_stocks], stock_id_new))
stock_rtn_this_week = np.hstack((stock_rtn_this_week[boolean_keep_stocks], stock_rtn_new))
else:
# No replacement of stocks / all surview but order might differ
boolean_cand = np.in1d(stock_cand_temp, stock_id_this_week, assume_unique=True, invert=False)
stock_id_this_week = stock_cand_temp[boolean_cand]
stock_rtn_this_week = stock_rtn_temp[boolean_cand]
# PnL last period
pf_rtn[n_weeks-1, k] = stock_rtn_this_week.mean()
print(default_timer() - start)