calling Matlab's portfolio optimizer via Python failed - python

Here I estimated my asset returns and covariances in Python and want to use Matlab's portfolio optimizer to get a portfolio with maximum Sharpe ratio
in Matlab I have a function
function pwgt= calculate_portfolio(r0,m,C,lb,ub,target)
p = Portfolio('RiskFreeRate', r0);
p = setAssetMoments(p, m, C);
p = setBounds(p,lb,ub);
p = setBudget(p,target,target);
pwgt = estimateMaxSharpeRatio(p);
In Python I have
import matlab
import matlab.engine
eng = matlab.engine.connect_matlab()
test_mean = matlab.double(mat_mean.values.tolist())
test_cov = matlab.double(mat_cov.values.tolist())
result = eng.calculate_portfolio(0.03, test_mean, test_cov, -1, 1, 1)
Where test_mean is a 5*1 pandas series, test_cov is a 5*5 covariance matrix (stored as pandas dataframe).
However, when I call the function in Python, the error goes as:
MatlabExecutionError:
File /Applications/MATLAB_R2016b.app/toolbox/optim/optim/linprog.m, line 158, in linprog
File /Applications/MATLAB_R2016b.app/toolbox/finance/finance/#Portfolio/private/mv_estimate_lower_bound.p, line 0, in mv_estimate_lower_bound
File /Applications/MATLAB_R2016b.app/toolbox/finance/finance/#Portfolio/private/mv_optim_transform.p, line 0, in mv_optim_transform
File /Applications/MATLAB_R2016b.app/toolbox/finance/finance/#Portfolio/estimateFrontierLimits.m, line 70, in estimateFrontierLimits
File /Applications/MATLAB_R2016b.app/toolbox/finance/finance/#Portfolio/estimateMaxSharpeRatio.m, line 33, in estimateMaxSharpeRatio
File /Users/Michael/Documents/MATLAB/calculate_portfolio.m, line 6, in calculate_portfolio
LINPROG requires the following inputs to be of data type double: 'b','LB'.
How do I solve this problem.

Related

How to implement multiprocessing in a for loop inside a function

I've built some code to minimize the sum of the weighted least squares of some residuals. I first read all the data from a .gz file and then process it on the code below (details are irrelevant). I want to use multiprocessing in order to speed up the "runFit" function.
My code is below:
"""
Fit 3D lines to cylinders
"""
from timeit import default_timer as timer
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.patches import Circle
from scipy.optimize import minimize
from numba import jit
from multiprocessing import Pool
def readData(filename):
"Read compressed data."
return np.loadtxt(filename, delimiter=",")
#jit(nopython=True)
def weightedResiduals(unknown, wire_coords, radii, d_radii, d_zcoords):
"Calculates the sum of the weighted residuals"
y_intercept = unknown[0]
z_intercept = unknown[1]
xy_slope = unknown[2]
xz_slope = unknown[3]
intercept_vector = np.array([0, y_intercept, z_intercept])
gradient_vector = np.array([1, xy_slope, xz_slope])
gradient_vector /= np.linalg.norm(gradient_vector)
result = 0
for index in range(np.shape(wire_coords)[0]):
distance = np.linalg.norm(np.cross((wire_coords[index]-intercept_vector), gradient_vector)) - radii[index]
weight = (d_radii[index]**2 + d_zcoords[index]**2)**(-1/2)
result += (weight * distance)**2
return result
def runFit(inputfilename, outputfilename):
"""
Parameters
----------
inputfilename : string
input data file name for fitting.
outputfilename : string
result storage file name.
Returns
-------
counter : int
number of successful fits; 100% would be twice the number
of events (two lines per event).
"""
counter = 0
#Reading the required data set
fulldata = readData(inputfilename)
#Defining the output array and filling in the first two columns
event_no = int(fulldata[-1,0])
result = np.zeros((2*event_no, 10))
result[:,0] = np.repeat(np.arange(1, event_no+1), 2)
line_no_array = np.empty((2*event_no,))
line_no_array[::2] = 1
line_no_array[1::2] = 2
result[:,1] = line_no_array
def singleEventFit(event):
#Using masking to obtain required rows
mask = (fulldata==event)
desired_rows = mask[:, 0]
#Calculating the fitted line variables using weighted least squares
for line in range(1,3):
#Extracting the desired rows from the full data array
desired_array = fulldata[np.logical_and(desired_rows,(fulldata==line)[:,1])]
#Extracting grouped data from the desired rows
wire_coords = desired_array[:,2:5]
wire_x_coords = wire_coords[:,0]
wire_y_coords = wire_coords[:,1]
wire_z_coords = wire_coords[:,2]
radii = desired_array[:,5]
d_radii, d_zcoords = desired_array[:,6], desired_array[:,7]
#Estimating an initial guess for the fitted line variables
x_min_index = np.argmin(np.abs(wire_x_coords))
x_max_index = np.argmax(np.abs(wire_x_coords))
y_intercept_guess = wire_y_coords[x_min_index]
z_intercept_guess = wire_z_coords[x_min_index]
xy_slope_guess = (wire_y_coords[x_max_index]-wire_y_coords[x_min_index])/(wire_x_coords[x_max_index]-wire_x_coords[x_min_index])
xz_slope_guess = (wire_z_coords[x_max_index]-wire_z_coords[x_min_index])/(wire_x_coords[x_max_index]-wire_x_coords[x_min_index])
init = np.array([y_intercept_guess, z_intercept_guess, xy_slope_guess, xz_slope_guess])
#Minimizing the sum of the weighted residuals
fit_vars = minimize(weightedResiduals, init, args=(wire_coords, radii, d_radii, d_zcoords), tol=1e-5)
if fit_vars.success == True:
y_intercept, z_intercept = fit_vars.x[0], fit_vars.x[1]
xy_slope, xz_slope = fit_vars.x[2], fit_vars.x[3]
#Using the half of the inverse of the Hessian matrix as the covariance matrix to recover errors
std_array = np.sqrt(np.diag(0.5*fit_vars.hess_inv))
#Inputting the variables and their errors on the output array
result[2*event+line-3, 2], result[2*event+line-3, 4] = y_intercept, xy_slope
result[2*event+line-3, 6], result[2*event+line-3, 8] = z_intercept, xz_slope
result[2*event+line-3, 3], result[2*event+line-3, 5] = std_array[0], std_array[2]
result[2*event+line-3, 7], result[2*event+line-3, 9] = std_array[1], std_array[3]
with Pool() as pool:
pool.map(singleEventFit, [event for event in range(1, event_no+1)])
#Returning resulting array as a text file
np.savetxt(outputfilename, result, delimiter=',')
return counter
start = timer()
if __name__=='__main__':
print("Successful Plots: " + str(runFit("tendata.txt.gz", "output.txt.gz")))
end = timer()
print("Time: " + str(end-start) + "s")
However, I get the following traceback:
Traceback (most recent call last):
File "C:\Users\vanes\Downloads\Python Project\untitled0.py", line 113, in <module>
print("Successful Plots: " + str(runFit("tendata.txt.gz", "output.txt.gz")))
File "C:\Users\vanes\Downloads\Python Project\untitled0.py", line 105, in runFit
pool.map(singleEventFit, [event for event in range(1, event_no+1)])
File "C:\Users\vanes\anaconda3\lib\multiprocessing\pool.py", line 364, in map
return self._map_async(func, iterable, mapstar, chunksize).get()
File "C:\Users\vanes\anaconda3\lib\multiprocessing\pool.py", line 771, in get
raise self._value
File "C:\Users\vanes\anaconda3\lib\multiprocessing\pool.py", line 537, in _handle_tasks
put(task)
File "C:\Users\vanes\anaconda3\lib\multiprocessing\connection.py", line 211, in send
self._send_bytes(_ForkingPickler.dumps(obj))
File "C:\Users\vanes\anaconda3\lib\multiprocessing\reduction.py", line 51, in dumps
cls(buf, protocol).dump(obj)
AttributeError: Can't pickle local object 'runFit.<locals>.singleEventFit'
Is there any way that I can use multiprocessing in order to speed up the for-loop?
After reviewing the internet, the recommendation was to move the inner function outside and make it global. However, this can't work since I need variables defined inside "runFit()" in order to execute the loop.

Error with pymc3 sampler in pypesto: theano.graph.fg MissingInputError

I am tackling a bayesian inference problem and am having trouble using a pymc3 sampler provided by pypesto on my windows laptop. To make sure I can run with the sampler I create a simple dummy objective to use.
I install create a conda (I tried both 3.7 & 3.8) environment and install the pymc3 and theano modules using pip3/pip. I've tried several different versions of both pymc3/theano and managed to import them succesfully. However, there is an error message I cannot figure out how to go around. I have tried looking online for a solution but was not able to find it either. I currently have the latest versions of pymc3 and theano installed (3.11.0 and 1.0.5 respectively). This is the final line of the message
theano.graph.fg.MissingInputError: Input 0 of the graph (indices start from 0), used to compute sigmoid(x2_interval__), was not provided and not given a value. Use the Theano flag exception_verbosity='high', for more information on this error.
Here is the full message:
Sampling 1 chain for 1_000 tune and 100 draw iterations (1_000 + 100 draws total) took 7 seconds.
Traceback (most recent call last):
File "samplingPymc3.py", line 70, in <module>
result2 = sample.sample(problem1, 100, sampler2, x0=np.array([0,0]))
File "C:\Users\germa\anaconda3\envs\sampling\lib\site-packages\pypesto\sample\sample.py", line 68, in sample
sampler.sample(n_samples=n_samples)
File "C:\Users\germa\anaconda3\envs\sampling\lib\site-packages\pypesto\sample\pymc3.py", line 102, in sample
**self.options)
File "C:\Users\germa\anaconda3\envs\sampling\lib\site-packages\pymc3\sampling.py", line 637, in sample
idata = arviz.from_pymc3(trace, **ikwargs)
File "C:\Users\germa\anaconda3\envs\sampling\lib\site-packages\arviz\data\io_pymc3.py", line 559, in from_pymc3
density_dist_obs=density_dist_obs,
File "C:\Users\germa\anaconda3\envs\sampling\lib\site-packages\arviz\data\io_pymc3.py", line 163, in __init__
self.observations, self.multi_observations = self.find_observations()
File "C:\Users\germa\anaconda3\envs\sampling\lib\site-packages\arviz\data\io_pymc3.py", line 176, in find_observations
multi_observations[key] = val.eval() if hasattr(val, "eval") else val
File "C:\Users\germa\anaconda3\envs\sampling\lib\site-packages\theano\graph\basic.py", line 554, in eval
self._fn_cache[inputs] = theano.function(inputs, self)
File "C:\Users\germa\anaconda3\envs\sampling\lib\site-packages\theano\compile\function\__init__.py", line 350, in function
output_keys=output_keys,
File "C:\Users\germa\anaconda3\envs\sampling\lib\site-packages\theano\compile\function\pfunc.py", line 532, in pfunc
output_keys=output_keys,
File "C:\Users\germa\anaconda3\envs\sampling\lib\site-packages\theano\compile\function\types.py", line 1978, in orig_function
name=name,
File "C:\Users\germa\anaconda3\envs\sampling\lib\site-packages\theano\compile\function\types.py", line 1584, in __init__
fgraph, additional_outputs = std_fgraph(inputs, outputs, accept_inplace)
File "C:\Users\germa\anaconda3\envs\sampling\lib\site-packages\theano\compile\function\types.py", line 188, in std_fgraph
fgraph = FunctionGraph(orig_inputs, orig_outputs, update_mapping=update_mapping)
File "C:\Users\germa\anaconda3\envs\sampling\lib\site-packages\theano\graph\fg.py", line 162, in __init__
self.import_var(output, reason="init")
File "C:\Users\germa\anaconda3\envs\sampling\lib\site-packages\theano\graph\fg.py", line 330, in import_var
self.import_node(var.owner, reason=reason)
File "C:\Users\germa\anaconda3\envs\sampling\lib\site-packages\theano\graph\fg.py", line 383, in import_node
raise MissingInputError(error_msg, variable=var)
theano.graph.fg.MissingInputError: Input 0 of the graph (indices start from 0), used to compute sigmoid(x2_interval__), was not provided and not given a value. Use the Theano flag exception_verbosity='high', for more information on this error.
I read somewhere that the issue may lie with the version of arviz used but that does not appear to be the issue in my case.
I wanted to include the script I am running. Here is the code for the script:
import numpy as np
import scipy as sp
import scipy.optimize as so
from scipy.stats import multivariate_normal
import pypesto
import pypesto.sample as sample
from pypesto import Objective
A = np.array([[2.0, 0.0], [0.0, 1.0]])
b = np.array([2.0, 1.0])
x_init = np.array([3.4302, 2.915])
x_true = np.array([1.0, 1.0])
temp = lambda x: A.dot(x) - b
f = lambda x: .5 * np.linalg.norm(temp(x))
A_t = A.transpose()
K = np.dot(A_t, A)
df = lambda x: K.dot(x) - A_t.dot(b)
def obj1(x):
# f_val = f(x)
# grad = df(x)
return (f(x), df(x))
objfun = lambda x: obj1(x)
dim_full = 2
lb = -10 * np.ones((dim_full, 1))
ub = 10 * np.ones((dim_full, 1))
x_names = ['x1', 'x2']
# step_fcn = pymc3.step_methods.hmc.hmc.HamiltonianMC
objective = pypesto.Objective(fun=objfun, grad=True, hess=False)
problem1 = pypesto.Problem(objective=objective, lb=lb, ub=ub, x_names=x_names)
sampler = sample.AdaptiveMetropolisSampler()
print('function val: ', objfun(x_init))
sampler2 = sample.Pymc3Sampler()
result2 = sample.sample(problem1, 100, sampler2, x0=np.array([0, 0]))
print('Done sampling!')
Thank you in advance for any help!
pymc3 support of pypesto is at the moment limited, as it was implemented at a time when theano was discontinued in favor of aesara in pymc3. Thus, pypesto only supports specific version of the involved tools, specifically
arviz >= 0.8.1, < 0.9.0
theano >= 1.0.4
packaging >= 20.0
pymc3 >= 3.8, < 3.9.2
(see https://github.com/ICB-DCM/pyPESTO/blob/main/setup.cfg#L111).
The switch to full aesara and later pymc3 version support is underway, but not out yet.

non linear optimization with incomplete gamma function in constraint in scipy

I want to solve the following optimization problem with scipy (python3):
where I[.,.] is the "incomplete gamma function" (scipy.special.gammainc)
I followed the scipy optimization guide and came up with this code:
from scipy.special import gammainc
from scipy.optimize import NonlinearConstraint
from scipy.optimize import BFGS
from scipy.optimize import minimize
import numpy as np
q = 200
k = 1.5625
alpha = 0.03125
def func(s):
return s
def constraints(s):
return q - k/alpha * (gammainc(k+1, s*alpha) + gammainc(k+1, (s+q)*alpha)) + s*gammainc(k, s*alpha) + (s+q)*gammainc(k, (s+q)*alpha)
nonlinear_constraint = NonlinearConstraint(constraints, -np.inf, 10.0, jac='2-point', hess=BFGS())
s0 = 1000
res = minimize(func, s0, method='trust-constr', jac='2-point', hess=BFGS(), constraints=[nonlinear_constraint], options={'verbose': 1})
print(res.x)
when I run the code, I get this (error) output:
/home/andreasziegler/miniconda3/lib/python3.7/site-packages/scipy/optimize/_hessian_update_strategy.py:187: UserWarning: delta_grad == 0.0. Check if the approximated function is linear. If the function is linear better results can be obtained by defining the Hessian as zero instead of using quasi-Newton approximations.
'approximations.', UserWarning)
Traceback (most recent call last):
File "s_opt.py", line 23, in <module>
res = minimize(func, s0, method='trust-constr', jac='2-point', hess=BFGS(), constraints=[nonlinear_constraint], options={'verbose': 1})
File "/home/andreasziegler/miniconda3/lib/python3.7/site-packages/scipy/optimize/_minimize.py", line 622, in minimize
callback=callback, **options)
File "/home/andreasziegler/miniconda3/lib/python3.7/site-packages/scipy/optimize/_trustregion_constr/minimize_trustregion_constr.py", line 519, in _minimize_trustregion_constr
factorization_method)
File "/home/andreasziegler/miniconda3/lib/python3.7/site-packages/scipy/optimize/_trustregion_constr/tr_interior_point.py", line 329, in tr_interior_point
factorization_method, trust_lb, trust_ub, subprob.scaling)
File "/home/andreasziegler/miniconda3/lib/python3.7/site-packages/scipy/optimize/_trustregion_constr/equality_constrained_sqp.py", line 121, in equality_constrained_sqp
lb_t, ub_t)
File "/home/andreasziegler/miniconda3/lib/python3.7/site-packages/scipy/optimize/_trustregion_constr/qp_subproblem.py", line 499, in projected_cg
r = Z.dot(H.dot(x) + c)
File "/home/andreasziegler/miniconda3/lib/python3.7/site-packages/scipy/sparse/linalg/interface.py", line 415, in dot
return self.matvec(x)
File "/home/andreasziegler/miniconda3/lib/python3.7/site-packages/scipy/sparse/linalg/interface.py", line 229, in matvec
y = self._matvec(x)
File "/home/andreasziegler/miniconda3/lib/python3.7/site-packages/scipy/sparse/linalg/interface.py", line 527, in _matvec
return self.__matvec_impl(x)
File "/home/andreasziegler/miniconda3/lib/python3.7/site-packages/scipy/optimize/_trustregion_constr/projections.py", line 193, in null_space
aux2 = scipy.linalg.solve_triangular(R, aux1, lower=False)
File "/home/andreasziegler/miniconda3/lib/python3.7/site-packages/scipy/linalg/basic.py", line 336, in solve_triangular
b1 = _asarray_validated(b, check_finite=check_finite)
File "/home/andreasziegler/miniconda3/lib/python3.7/site-packages/scipy/_lib/_util.py", line 246, in _asarray_validated
a = toarray(a)
File "/home/andreasziegler/miniconda3/lib/python3.7/site-packages/numpy/lib/function_base.py", line 499, in asarray_chkfinite
"array must not contain infs or NaNs")
ValueError: array must not contain infs or NaNs
which tells me that I did something wrong. It's the first time I use the optimizer in scipy and therefore chances are high, that I misunderstand something (essential). I'm glad for any hint.

How do I construct design matrix in NumPy (for linear regression)?

For this lab I need to sample 150 x-values from a Normal distribution using a mean of 0 and standard deviation of 10, then from the x-values construct a design matrix using the features {1,x,x^2}.
We have to sample parameters and then use the design matrix to create y values for regression data.
The problem is that my design matrix isn't square, and the Moore-Penrose Pseduoinverse needs square matrices, but I don't know how to get that to work given the earlier setup of the lab?
This is what I've done
#Linear Regression Lab
import numpy as np
import math
data = np.random.normal(0, 10, 150)
design_matrix = np.zeros((150,3))
for i in range(150):
design_matrix[i][0] = 1
design_matrix[i][1] = data[i]
design_matrix[i][2] = pow(data[i], 2)
print("-------------------Design Matrix---------------------")
print("|--------1--------|-------x-------|--------x^2--------|")
print(design_matrix[:20])
#sampling paramters
theta_0 = np.random.uniform(low = -30, high = 20)
theta_1 = np.random.uniform(low = -30, high = 20)
theta_2 = np.random.uniform(low = -30, high = 20)
print(theta_0, theta_1, theta_2)
theta = np.array([theta_0, theta_1, theta_2])
theta = np.transpose(theta)
#moore penrose psuedo inverse
MPpi = np.linalg.pinv(design_matrix) ##problem here
y_values = np.linalg.inv(MPpi)
Feel free to edit this incomplete answer
After running this code on Repl, I got the following error message
Traceback (most recent call last):
File "main.py", line 32, in <module>
y_values = np.linalg.inv(MPpi)
File "<__array_function__ internals>", line 5, in inv
File "/home/runner/.local/share/virtualenvs/python3/lib/python3.8/site-packages/numpy/linalg/linalg.py", line 542, in inv
_assert_stacked_square(a)
File "/home/runner/.local/share/virtualenvs/python3/lib/python3.8/site-packages/numpy/linalg/linalg.py", line 213, in _assert_stacked_square
raise LinAlgError('Last 2 dimensions of the array mustbe square')
numpy.linalg.LinAlgError: Last 2 dimensions of the array must be square
The first error propagates from taking the inverse of MPpi
By looking at the docs, it seems that pinv switches the last two dimensions [e.g., an m x n matrix becomes n x m], so we will need to format the matrix before calculating the psuedoinverse
As far as the Moore Penrose inverse AKA pinv is concerned, this article suggests that multiplying MPpi*data, which will yield x_0 {notation from Ross MacAusland}, which is the best fit for your least squares regression.

Interpolate a discontinuous function with Scipy

I am having problems interpolating some data points using Scipy. I guess that it might depend on the fact that the function I'm trying to interpolate is discontinuous at x roughly 4.
Here is the code I'm using to interpolate:
from scipy import *
y_interpolated = interp1d(x,y,buonds_error=False,fill_value=0.,kind='cubic')
new_x_array = arange(min(x),max(x),0.05)
plot(new_x_array,x_interpolated(new_x_array),'r-')
The error I get is
File "<stdin>", line 2, in <module>
File "/Library/Frameworks/EPD64.framework/Versions/7.1/lib/python2.7/site-packages/scipy/interpolate/interpolate.py", line 357, in __call__
out_of_bounds = self._check_bounds(x_new)
File "/Library/Frameworks/EPD64.framework/Versions/7.1/lib/python2.7/site-packages/scipy/interpolate/interpolate.py", line 415, in _check_bounds
raise ValueError("A value in x_new is above the interpolation "
ValueError: A value in x_new is above the interpolation range.
These are my data points:
1.56916432074 -27.9998263169
1.76773750527 -27.6198430485
1.98360238449 -27.2397962268
2.25133982943 -26.8596491107
2.49319293195 -26.5518194791
2.77823462692 -26.1896935372
3.07201297519 -25.9540514619
3.46090507092 -25.7362456112
3.65968688527 -25.6453922172
3.84116464506 -25.53652509
3.97070419447 -25.3374215879
4.03087127145 -24.8493356465
4.08217147954 -24.0540196233
4.12470899596 -23.0960856364
4.17612639206 -22.4634289328
4.19318305992 -22.1380894034
4.2708234589 -21.902951035
4.3745696768 -21.9027079759
4.52158254627 -21.9565591238
4.65985875536 -21.8839570732
4.80666329863 -21.6486676004
4.91026629192 -21.4496126386
5.05709528961 -21.2685401725
5.29054655428 -21.2860476871
5.54129211534 -21.3215908912
5.73174988353 -21.6645019816
6.06035782465 -21.772138994
6.30243916407 -21.7715483093
6.59656410998 -22.0238656166
6.86481948673 -22.3665921479
7.01182409559 -22.4385289076
7.17609125906 -22.4200564296
7.37494987052 -22.4376476472
7.60844044988 -22.5093814451
7.79869207061 -22.5812017094
8.00616642549 -22.5445612485
8.17903446593 -22.4899243886
8.29141325457 -22.4715846981
What version of scipy are you using?
The script you posted has some syntax errors (I assume due to wrong copy and paste).
This script works, with scipy.__version__ == 0.9.0. .
import sys
from scipy import *
from scipy.interpolate import *
from pylab import plot
x = []
y = []
for line in sys.stdin:
a, b = line.split()
x.append(float(a))
y.append(float(b))
y_interpolated = interp1d(x,y,bounds_error=False,fill_value=0.,kind='cubic')
new_x_array = arange(min(x),max(x),0.05)
plot(new_x_array,y_interpolated(new_x_array),'r-')

Categories