Adding rolling optimization into Vectorbt with N months lookback windows - python

I am working on a backtesting code for a Maximize Sharpe portfolio.
Here I am using PyPortfolioOpt instead of cvxpy here to compute the weight, however, I am confused in where to configure the number of months for lookback period for this optimization, e.g. a a rolling 36 months optimization.
I believe lookback window should be set here, but I am not quite sure how.
def pre_segment_func_nb(c, find_weights_nb, history_len, ann_factor, num_tests, srb_sharpe):
if history_len == -1:
# Look back at the entire time period
close = c.close[:c.i, c.from_col:c.to_col]
else:
# Look back at a fixed time period
if c.i - history_len <= 0:
return (np.full(c.group_len, np.nan),) # insufficient data
close = c.close[c.i - history_len:c.i, c.from_col:c.to_col]
Here is the full code FYR.
import os
import numpy as np
import pandas as pd
import yfinance as yf
from datetime import datetime
import pytz
from numba import njit
import matplotlib.pyplot as plt
import seaborn as sns
from pypfopt.efficient_frontier import EfficientFrontier
from pypfopt import risk_models
from pypfopt import expected_returns
from pypfopt import base_optimizer
from pypfopt import objective_functions
from pypfopt.discrete_allocation import DiscreteAllocation, get_latest_prices
from pypfopt import EfficientSemivariance
from pypfopt.expected_returns import mean_historical_return
from pypfopt.expected_returns import returns_from_prices
import vectorbt as vbt
from vectorbt.generic.nb import nanmean_nb
from vectorbt.portfolio.nb import order_nb, sort_call_seq_nb
from vectorbt.portfolio.enums import SizeType, Direction
from IPython.display import set_matplotlib_formats
%matplotlib inline
%config InlineBackend.figure_format='retina'
symbols = ['NVDA','AMD','TSLA','NET','JPM','AAPL']
start_date = datetime(2012, 1, 1, tzinfo=pytz.utc)
end_date = datetime.today().strftime('%Y-%m-%d')
num_tests = 1000000
vbt.settings.array_wrapper['freq'] = 'days'
vbt.settings.returns['year_freq'] = '252 days'
vbt.settings.portfolio['seed'] = 42
vbt.settings.portfolio.stats['incl_unrealized'] = True
yfdata = vbt.YFData.download(symbols, start=start_date, end=end_date)
ohlcv = yfdata.concat()
price = ohlcv['Close'].fillna(method='ffill')
srb_sharpe = np.full(price.shape[0], np.nan)
#njit
def pre_sim_func_nb(c, every_nth):
# Define rebalancing days
c.segment_mask[:, :] = False
c.segment_mask[every_nth::every_nth, :] = True
return ()
#njit
def find_weights_nb(c, price, num_tests):
# Find optimal weights based on best Sharpe ratio
returns = (price[1:] - price[:-1]) / price[:-1]
returns = returns[1:, :] # cannot compute np.cov with NaN
mean = nanmean_nb(returns)
cov = np.cov(returns, rowvar=False) # masked arrays not supported by Numba (yet)
best_sharpe_ratio = -np.inf
#best_sharpe_ratio = -np.inf
weights = np.full(c.group_len, np.nan, dtype=np.float_)
for i in range(num_tests):
# Generate weights
w = np.random.random_sample(c.group_len)
w = w / np.sum(w)
# Compute annualized mean, covariance, and Sharpe ratio
p_return = np.sum(mean * w) * ann_factor
p_std = np.sqrt(np.dot(w.T, np.dot(cov, w))) * np.sqrt(ann_factor)
sharpe_ratio = p_return / p_std
if sharpe_ratio > best_sharpe_ratio:
best_sharpe_ratio = sharpe_ratio
weights = w
return best_sharpe_ratio, weights
#njit
def pre_segment_func_nb(c, find_weights_nb, history_len, ann_factor, num_tests, srb_sharpe):
if history_len == -1:
# Look back at the entire time period
close = c.close[:c.i, c.from_col:c.to_col]
else:
# Look back at a fixed time period
if c.i - history_len <= 0:
return (np.full(c.group_len, np.nan),) # insufficient data
close = c.close[c.i - history_len:c.i, c.from_col:c.to_col]
# Find optimal weights
best_sharpe_ratio, weights = find_weights_nb(c, close, num_tests)
srb_sharpe[c.i] = best_sharpe_ratio
# Update valuation price and reorder orders
size_type = SizeType.TargetPercent
direction = Direction.LongOnly
order_value_out = np.empty(c.group_len, dtype=np.float_)
for k in range(c.group_len):
col = c.from_col + k
c.last_val_price[col] = c.close[c.i, col]
sort_call_seq_nb(c, weights, size_type, direction, order_value_out)
return (weights,)
#njit
def order_func_nb(c, weights):
col_i = c.call_seq_now[c.call_idx]
return order_nb(
weights[col_i],
c.close[c.i, c.col],
size_type=SizeType.TargetPercent
)
ann_factor = returns.vbt.returns.ann_factor
def pyopt_find_weights(sc, price, num_tests): # no #njit decorator = it's a pure Python function
price = pd.DataFrame(price, columns=symbols)
avg_returns = expected_returns.mean_historical_return(price)
cov_mat = risk_models.CovarianceShrinkage(price).ledoit_wolf()
ef = EfficientFrontier(avg_returns, cov_mat, weight_bounds=(0,1))
min_weight, max_weight = 0.05, 0.35
constraints=[
# {"type": "eq", "fun": lambda w: np.sum(w) - 1}, # sum to 1
{"type": "ineq", "fun": lambda w: w - min_weight}, # greater than min_weight
{"type": "ineq", "fun": lambda w: max_weight - w}, # less than max_weight
]
weights = ef.nonconvex_objective(
objective_functions.sharpe_ratio,
objective_args=(avg_returns, cov_mat),
weights_sum_to_one=True,
constraints = constraints
)
clean_weights = ef.clean_weights()
weights = np.array([clean_weights[symbol] for symbol in symbols])
best_sharpe_ratio = base_optimizer.portfolio_performance(weights, avg_returns, cov_mat)[2]
latest_prices = get_latest_prices(price)
da = DiscreteAllocation(clean_weights, latest_prices, total_portfolio_value=25000)
allocation, leftover = da.lp_portfolio(reinvest=True)
w = pd.DataFrame(clean_weights, columns=clean_weights.keys(), index = [0])
w.to_excel('w.xlsx')
return best_sharpe_ratio, weights
pyopt_srb_sharpe = np.full(price.shape[0], np.nan)
pyopt_srb_pf = vbt.Portfolio.from_order_func(
price,
order_func_nb,
pre_sim_func_nb=pre_sim_func_nb,
pre_sim_args=(63,), #63 #84
pre_segment_func_nb=pre_segment_func_nb.py_func, # run pre_segment_func_nb as pure Python function
pre_segment_args=(pyopt_find_weights, -1, ann_factor, num_tests, pyopt_srb_sharpe),
cash_sharing=True,
group_by=True,
use_numba=False # run simulate_nb as pure Python function
)
greatly appreciated

Related

How to add contrains in python when optimizing a portfolio?

I am new to python and does not have a lot of experience.
I am trying to add constrains in this code so that the weights of one stock cannot be equal to 0, but the weights of the same stock cannot be above 5% either. This constrain will be valid for all the stocks.
This is what I have so far, is there anyone that have any tips on how to add these constrains?
Thanks in advance!
import os
import pandas as pd
import numpy as np
from scipy.optimize import linprog
data = pd.read_excel("data.xlsm")
# change data['WGT_GLOBAL'] s.t. EUTax = 20
data['Weights screened'] = data['WGT_GLOBAL']*data['Positiv screening']
EUTax = (data['Weights screened']*data['EU tax']).sum()
# min = -(x*data['YTD Return']).sum()
# s.t. x >= 0, x <= 1, (x*data['Positiv screening']*data['EU tax']).sum() = 20
obj = -(data['YTD Return'].fillna(0).to_numpy())
bnd = [(0,1)]
lhs_eq = [data['Positiv screening']*data['EU tax'].to_numpy(),np.ones(len(data))]
rhs_eq = [[20],[1]]
opt = linprog(c=obj, A_eq=lhs_eq, b_eq=rhs_eq, bounds=bnd, method="revised simplex")
optimal_weights = opt.x

Getting a "ValueError: zero-size array to reduction operation minimum which has no identity" when using metpy cape_cin function

I'm trying to calculate cape and cin in a specified layer using metpy's cape_cin and get_layer functions. Im doing this for a RAP vertical profile that I access from the NCDC server, for 3 different parcel types (ML, MU, and SB). I get this error ValueError: zero-size array to reduction operation minimum which has no identity when I try to compute the mu and sb profiles, but not for the ml profile...even though i calculate the layers in the exact same way:
First, I have to create vertical profiles of pressure, temp, dewpoint, and heights, from the RAP vertical profile:
from datetime import datetime, timedelta
from siphon.catalog import TDSCatalog
from siphon.ncss import NCSS
import numpy as np
from metpy.units import units
import cartopy.crs as ccrs
from metpy.calc import (dewpoint_from_relative_humidity, mixed_parcel, most_unstable_parcel, parcel_profile, pressure_to_height_std, lcl, height_to_pressure_std, get_layer, cape_cin)
year=2019
month=5
day=23
hour=0
cenlat = 34.91269
cenlon = -98.21048
time_start = datetime(year, month, day, hour, 0) #specified time
hour = time_start.hour
if hour < 10:
hour = '0'+str(hour)
day = time_start.day
if day < 10:
day = '0'+str(day)
month = time_start.month
if month < 10:
month = '0'+str(month)
cat = TDSCatalog('https://www.ncdc.noaa.gov/thredds/catalog/model-rap130-old/'+str(time_start.year)+str(month)+'/'+str(time_start.year)+str(month)+str(day)+'/catalog.html?dataset=rap130-old/'+str(time_start.year)+str(month)+'/'+str(time_start.year)+str(month)+str(day)+'/rap_130_'+str(time_start.year)+str(month)+str(day)+'_'+str(hour)+'00_000.grb2')
latest_ds = list(cat.datasets.values())[0]
print(latest_ds.access_urls)
ncss = NCSS(latest_ds.access_urls['NetcdfSubset'])
query = ncss.query()
query.variables('Pressure_surface').variables('Geopotential_height_isobaric').variables('Geopotential_height_surface').variables('Relative_humidity_isobaric').variables('Temperature_isobaric').variables('Dewpoint_temperature_height_above_ground').variables('Temperature_height_above_ground').variables
query.add_lonlat().lonlat_box(cenlon-2.1, cenlon +2.1, cenlat-2.1, cenlat+2.1)
data1 = ncss.get_data(query)
dlev = data1.variables['Geopotential_height_isobaric'].dimensions[1]
dlat = data1.variables['Geopotential_height_isobaric'].dimensions[2]
dlon = data1.variables['Geopotential_height_isobaric'].dimensions[3]
SFCP = (np.asarray(data1.variables['Pressure_surface'][:])/100.) * units('hPa')
hgt = np.asarray(data1.variables['Geopotential_height_isobaric'][:]) * units('meter')
sfc_hgt = np.asarray(data1.variables['Geopotential_height_surface'][:]) * units('meter')
Temp_up = np.asarray(data1.variables['Temperature_isobaric'][:]) * units('kelvin')
RH_up = np.asarray(data1.variables['Relative_humidity_isobaric'][:])
Td = (np.asarray(data1.variables['Dewpoint_temperature_height_above_ground'][:]) * units('kelvin')).to('degC')
T = np.asarray(data1.variables['Temperature_height_above_ground'][:]) * units('kelvin')
# Get the dimension data
lats_r = data1.variables[dlat][:]
lons_r= data1.variables[dlon][:]
lev = (np.asarray(data1.variables[dlev][:])/100.) * units('hPa')
# Set up our array of latitude and longitude values and transform to the desired projection.
flon = float(cenlon)
flat = float(cenlat)
crs = ccrs.PlateCarree()
crlons, crlats = np.meshgrid(lons_r[:]*1000, lats_r[:]*1000)
trlatlons = crs.transform_points(ccrs.LambertConformal(central_longitude=265, central_latitude=25, standard_parallels=(25.,25.)),crlons,crlats)
trlons = trlatlons[:,:,0]
trlats = trlatlons[:,:,1]
dlon = np.abs(trlons - cenlon)
dlat = np.abs(trlats - cenlat)
ilon = np.where(dlon == np.min(dlon)) #position in the dlon array with minimal difference between gridpoint lon and input lon
ilat = np.where(dlat == np.min(dlat)) #position in the dlat array with minimal difference between gridpoint lat and input lat
Tdc_up = dewpoint_from_relative_humidity(Temp_up[0,:,ilat[0][0], ilon[1][0]],RH_up[0,:,ilat[0][0], ilon[1][0]]/100)
p_sounding = np.sort(np.append(lev, SFCP[0,ilat[0][0], ilon[1][0]]))
ind = np.where(p_sounding >= SFCP[0,ilat[0][0], ilon[1][0]])[0][0]
hgt_sounding = np.insert(hgt[0,:,ilat[0][0], ilon[1][0]].magnitude, ind, sfc_hgt[0,ilat[0][0], ilon[1][0]].magnitude) * hgt.units
T_sounding = (np.insert(Temp_up[0,:,ilat[0][0], ilon[1][0]].magnitude, ind, T[0,0,ilat[0][0], ilon[1][0]].magnitude) * T.units).to(Tdc_up.units)
Td_sounding = np.insert(Tdc_up.magnitude, ind, Td[0,0,ilat[0][0], ilon[1][0]].magnitude) * Tdc_up.units
p_skewt = p_sounding[p_sounding <= SFCP[0,ilat[0][0], ilon[1][0]]]
hgt_skewt = hgt_sounding[p_sounding <= SFCP[0,ilat[0][0], ilon[1][0]]]
T_skewt = T_sounding[p_sounding <= SFCP[0,ilat[0][0], ilon[1][0]]]
Td_skewt = Td_sounding[p_sounding <= SFCP[0,ilat[0][0], ilon[1][0]]]
AGLhgts = hgt_skewt[::-1]-hgt_skewt[-1]
Next, I create vertical profiles for each parcel type using mixed_parcel, most_unstable, and parcel_profile functions, and compute the pressure values for th etop and bottom of the layer I want to calculate cape_cin for (the LCL to LCL+2km):
ml_p, ml_T, ml_Td = mixed_parcel(np.flip(p_skewt), np.flip(T_skewt), np.flip(Td_skewt))
ml_profile = parcel_profile(p_skewt[::-1], ml_T, ml_Td)
ml_profile = (ml_profile - 273.15*units('kelvin')).magnitude*units('degC')
mu_p, mu_T, mu_Td, mu_index = most_unstable_parcel(np.flip(p_skewt), np.flip(T_skewt), np.flip(Td_skewt))
mu_profile = parcel_profile(p_skewt[::-1], mu_T, mu_Td)
mu_profile = (mu_profile - 273.15*units('kelvin')).magnitude*units('degC')
#Note: sbpcl_profile will have the exact same values of p_skewt, T_skewt, and Td_skewt in pprof below:
pprof = parcel_profile(p_skewt[::-1], T_skewt[-1], Td_skewt[-1])
pprof = (pprof - 273.15*units('kelvin')).magnitude*units('degC')
mllcl = lcl(ml_p, ml_T, ml_Td)
mllcl_h = pressure_to_height_std(mllcl[0]) - hgt_skewt[-1]
mulcl = lcl(mu_p, mu_T, mu_Td)
mulcl_h = pressure_to_height_std(mulcl[0]) - hgt_skewt[-1]
sblcl = lcl(p_skewt[-1], T_skewt[-1], Td_skewt[-1])
sblcl_h = pressure_to_height_std(sblcl[0]) - hgt_skewt[-1]
mllcl2000 = mllcl_h + 2*units('kilometer')
mulcl2000 = mulcl_h + 2*units('kilometer')
sblcl2000 = sblcl_h + 2*units('kilometer')
mllcl2000_p = height_to_pressure_std(mllcl2000)
mulcl2000_p = height_to_pressure_std(mulcl2000)
sblcl2000_p = height_to_pressure_std(sblcl2000)
With all of that computed, I use the get_layer function to create the arrays of pressure, temp, dewpoint, and parcel temp I need to compute cape_cin, and then go to compute the actual cape_cin values in the layer of interest:
ml_LCL_CAPE_layer = get_layer(p_skewt, T_skewt, Td_skewt, ml_profile[::-1], bottom = mllcl[0], depth = mllcl[0] - mllcl2000_p)
mu_LCL_CAPE_layer = get_layer(p_skewt, T_skewt, Td_skewt, mu_profile[::-1], bottom = mulcl[0], depth = mulcl[0] - mulcl2000_p)
sb_LCL_CAPE_layer = get_layer(p_skewt, T_skewt, Td_skewt, pprof[::-1], bottom = sblcl[0], depth = sblcl[0] - sblcl2000_p)
mlLCLCAPE = cape_cin(ml_LCL_CAPE_layer[0], ml_LCL_CAPE_layer[1], ml_LCL_CAPE_layer[2], ml_LCL_CAPE_layer[3])
muLCLCAPE = cape_cin(mu_LCL_CAPE_layer[0], mu_LCL_CAPE_layer[1], mu_LCL_CAPE_layer[2], mu_LCL_CAPE_layer[3])
sbLCLCAPE = cape_cin(sb_LCL_CAPE_layer[0], sb_LCL_CAPE_layer[1], sb_LCL_CAPE_layer[2], sb_LCL_CAPE_layer[3])
mlLCLCAPEcin = mlLCLCAPE[0] + mlLCLCAPE[1]
muLCLCAPEcin = muLCLCAPE[0] + muLCLCAPE[1]
sbLCLCAPEcin = sbLCLCAPE[0] + sbLCLCAPE[1]
The arrays for pressure, temp, dewpoint, and parcel temp for each of the 3 get_layer functions appear to be populated with the correct values, and these 4 arrays for each parcel type are all the same shape. The mlLCLCAPEcin calculation above gives the correct output (99.26 j/kg - which verifies when I plot it on a SkewT), but the exact same calculation for the MU and SB profiles give the error referenced above. I'm using Metpy v 1.1, and have tried to use a different location and output from a different forecast hour, and still run into the same issue.
If I fix up your example code above (there were some name issues and some indexing issues) with:
T_sounding = (np.insert(Temp_up[0,:,ilat[0][0], ilon[1][0]].magnitude, ind, Temp_up[0,0,ilat[0][0], ilon[1][0]].magnitude) * Temp_up.units).to(Tdc_up.units)
Td_sounding = np.insert(Tdc_up.magnitude, ind, Tdc_up[0].magnitude) * Tdc_up.units
I don't get any error running right now. If my fix above doesn't fix it for you, it would be helpful to know if you're running the latest MetPy 1.1.

How to find the best line Fit Python(banister-impulse model)

I have this formula that is used to predict athletic performance base on daily stress.
It is based on 5 constant unique to each person. I'm trying to find these based on daily stress and performance testing that has been done. I'm new to programming and I don't know where to start.
see the formula
Performance= Fitness(=daily stress+yesterday fitness put decay) - Fatigue(daily stress+yesterday fatigue put decay) +P0
This is a sample of the data: data
thank you
import pandas as pd
import numpy as np
import math
from scipy import optimize
data = pd.read_csv('data_mod1.csv')
TSS = data['stress'].fillna(0)
arr = np.array(TSS)
#data = data.dropna()
a = [arr[0]]
b = [arr[0]]
x = arr[1:]
def Banister(x, t1, t2,k1,k2, c):
for v in x:
a.append(a[-1]*np.exp(-1/t1) + v)
b.append(b[-1]*np.exp(-1/t2) + v)
data['fit'] = pd.Series(a)
data['fat'] = pd.Series(b)
data['perf'] = ((data['fit']*k1)-(data['fat']*k2))+c
return data['perf']
# In[ ]:
from scipy.optimize import curve_fit
fit = curve_fit(Banister, arr,data[data.index], p0=[20, 10,1 ,2, 50])

How to continue after gradient descent?

I am very new to Data Science and Python. After a few hours of Experimentation, I finally received values for my gradient descent (code below). I am having trouble to plotting bzw. How can I plot the regression line automatically after the algorithm?
import numpy as np;
import matplotlib.pyplot as plt;
import csv
import pandas as pd
def gradient_descent(x,y):
m_curr=b_curr=0
iterations = 5000
n=len(x)
learning_rate = 0.01
for i in range(iterations):
y_predicted = m_curr*x + b_curr
cost = (1/n)*sum([val**2 for val in (y-y_predicted)])
md = -(2/n)*sum(x*(y-y_predicted))
bd = -(2/n)*sum(y-y_predicted)
m_curr = m_curr - learning_rate*md
b_curr = b_curr - learning_rate*bd
print("m{}, b{}, cost {}, iteration {}".format(m_curr,b_curr,cost,i))
if __name__ == '__main__':
#Reading data -> Output: DataFrame in float64
data = pd.read_csv('ex1data1.txt', sep=',', header=None, names=['Feature', 'Label'])
data.plot(x='Feature', y='Label', kind = 'scatter')
#separating data frame to
feat_vec = pd.DataFrame(data['Feature'])
label_vec = pd.DataFrame(data['Label'])
#Finding the Best Fit Line for our given Dataset and convert the df to np.array
#because it's more convenient for matrix multiplication
x = np.array(feat_vec)
y = np.array(label_vec)
gradient_descent(x,y)

How to resolve Boolean value error in linear regression model in python?

I am trying to run a fama-macbeth regression in a python. As afirst step I am running the time series for every asset in my portfolio but I am unable to run it because I am getting an error:
'ValueError: Must pass DataFrame with boolean values only'
I am relatively new to python and have heavily relied on this forum to help me out. I hope it you can help me with this issue.
Please let me know how I can resolve this. I will be very grateful to you!
I assume this line is producing the error. Cause when I run the function without the for loop, it works perfectly.
for i in range(cols):
df_beta = RegressionRoll(df=data_set, subset = 0, dependent = data_set.iloc[:,i], independent = data_set.iloc[:,30:], const = True, parameters = 'beta',
win = 12)
The dimension of my matrix is 108x35, 30 stocks and 5 factors over 108 points. Hence I want to run a regression for every stock against the 4 factors and store the result of the coeffs in a dataframe. Sample dataframe:
Date BAS GY AI FP SGL GY LNA GY AKZA NA Market Factor
1/29/2010 -5.28% -7.55% -1.23% -5.82% -7.09% -5.82%
2/26/2010 0.04% 13.04% -1.84% 4.06% -14.62% -14.62%
3/31/2010 10.75% 1.32% 7.33% 6.61% 12.21% 12.21%
The following is the entire code:
import pandas as pd
import statsmodels.api as sm
import numpy as np
import matplotlib.pyplot as plt
from statsmodels.stats.outliers_influence import variance_inflation_factor
data_set = pd.read_excel(r'C:\XXX\Research Project\Data\Regression.xlsx', sheet_name = 'Fama Macbeth')
data_set.set_index(data_set['Date'], inplace=True)
data_set.drop('Date', axis=1, inplace=True)
X = data_set.iloc[:,30:]
y = data_set.iloc[:,:30]
def RegressionRoll(df, subset, dependent, independent, const, win, parameters):
# Data subset
if subset != 0:
df = df.tail(subset)
else:
df = df
# Loopinfo
end = df.shape[0]
win = win
rng = np.arange(start = win, stop = end, step = 1)
# Subset and store dataframes
frames = {}
n = 1
for i in rng:
df_temp = df.iloc[:i].tail(win)
newname = 'df' + str(n)
frames.update({newname: df_temp})
n += 1
# Analysis on subsets
df_results = pd.DataFrame()
for frame in frames:
#print(frames[frame])
# Rolling data frames
dfr = frames[frame]
y = dependent
x = independent
if const == True:
x = sm.add_constant(dfr[x])
model = sm.OLS(dfr[y], x).fit()
else:
model = sm.OLS(dfr[y], dfr[x]).fit()
if parameters == 'beta':
theParams = model.params[0:]
coefs = theParams.to_frame()
df_temp = pd.DataFrame(coefs.T)
indx = dfr.tail(1).index[-1]
df_temp['Date'] = indx
df_temp = df_temp.set_index(['Date'])
df_results = pd.concat([df_results, df_temp], axis = 0)
if parameters == 'R2':
theParams = model.rsquared
df_temp = pd.DataFrame([theParams])
indx = dfr.tail(1).index[-1]
df_temp['Date'] = indx
df_temp = df_temp.set_index(['Date'])
df_temp.columns = [', '.join(independent)]
df_results = pd.concat([df_results, df_temp], axis = 0)
return(df_results)
cols = len(y.columns)
for i in range(cols):
df_beta = RegressionRoll(df=data_set, subset = 0, dependent = data_set.iloc[:,i], independent = data_set.iloc[:,30:], const = True, parameters = 'beta',
win = 12)
ValueError: Must pass DataFrame with boolean values only

Categories