How to load data as a matrix (using csv file) into python? - python

I would like to solve an LPP (Max CX s.to. AX<=B, X>=0) using pyomo, I have loaded the data for C, and B. However I tried to load the matrix A but after solving the model it does not show the correct answer. The code that I tried is in the following lines. Please suggest how to solve this type of problem in pyomo.
from pathlib import Path
from pyomo.environ import *
import pandas as pd
import numpy as np
path_model_in_set = Path('tmp/input/set')
path_model_in_par = Path('tmp/input/par')
path_model_out = Path('tmp/output')
path_results = Path('tmp/output')
m = AbstractModel('LPP')
#Set
m.a = Set(ordered=True, doc="indices for row")
m.b = Set(ordered=True, doc="indices for column")
#Parameters
m.A = Param(m.a, m.b, mutable=True, initialize=0, doc="Matrix")
m.B = Param(m.a, doc="RHS")
m.C = Param(m.b, doc="cost coefficients")
#variable
m.x = Var(m.b, within=NonNegativeReals)
def obj_rule(m):
return sum(m.C[t]*m.x[t] for t in m.b)
m.object_f = Objective(rule=obj_rule, sense=maximize, doc="Objective function")
def cons_rule(m,i):
return sum(m.A[i,j]*m.x[j] for j in m.b) <= m.B[i]
m.cons1 = Constraint(m.a, rule=cons_rule, doc="constraints")
data = DataPortal()
data.load(filename=str(path_model_in_set.joinpath('a.csv')), format='set', set='a')
data.load(filename=str(path_model_in_set.joinpath('b.csv')), format='set', set='b')
#the problem is in this line.
data. Load(filename=str(path_model_in_par.joinpath('Matrix_info.csv')), format='array', index=['A'], param=['1','2','3'])
data. Load(filename=str(path_model_in_par.joinpath('B.csv')), index=['a'], param=['B'])
data.load(filename=str(path_model_in_par.joinpath('C.csv')), index=['b'], param=['C'])
instance = m.create_instance(data)
print(instance.A)
optimizer = 'gurobi'
solver = SolverFactory(optimizer)
instance.write('LPP.lp', io_options={'symbolic_solver_labels': True})
solver_results = solver.solve(instance, tee=True)
solver_results.write()
instance.solutions.load_from(solver_results)
CSV files for matrix A is
A 1 2 3
1 2 1 1
2 1 2 1
3 0 0 1

Related

Convert Matlab code to Python code using proper syntax

I have this simple piece of Matlab code below I like to convert over to Python.What is the correct Python syntax for the last line? I have also posted my Python attempt below this little snippet.
%variables
x=\[1 2 3 4 5 6\]
l_Vector =5
memlen = 2
deg =2
n= 1
k=1
% create 5 x 4
x_val = np.zeros( (l_Vector,memlen\*deg) )
x_val(n,(k - 1)*memlen+(1:memlen)) = (x(n:(n + memlen-1)).*(abs(x(n:(n + memlen -1))).^(k - 1))).';
type here
I tried the following : breaking the last line down.
from scipy import linalg
import numpy as np
x= np.array([1,2,3,4,5,6])
l_Vector =5
memlen = 2
deg =2
x_terms = np.zeros( (l_Vector,memlendeg) )
term_1 = x[n:(n+memorylen)]
term_2 = np.abs(x[n:(n+memorylen)])**k
pp = np.multiply(term_1 ,term_2)
x_terms[n,(k)(memorylen) ,2] = pp.T

How to add contrains in python when optimizing a portfolio?

I am new to python and does not have a lot of experience.
I am trying to add constrains in this code so that the weights of one stock cannot be equal to 0, but the weights of the same stock cannot be above 5% either. This constrain will be valid for all the stocks.
This is what I have so far, is there anyone that have any tips on how to add these constrains?
Thanks in advance!
import os
import pandas as pd
import numpy as np
from scipy.optimize import linprog
data = pd.read_excel("data.xlsm")
# change data['WGT_GLOBAL'] s.t. EUTax = 20
data['Weights screened'] = data['WGT_GLOBAL']*data['Positiv screening']
EUTax = (data['Weights screened']*data['EU tax']).sum()
# min = -(x*data['YTD Return']).sum()
# s.t. x >= 0, x <= 1, (x*data['Positiv screening']*data['EU tax']).sum() = 20
obj = -(data['YTD Return'].fillna(0).to_numpy())
bnd = [(0,1)]
lhs_eq = [data['Positiv screening']*data['EU tax'].to_numpy(),np.ones(len(data))]
rhs_eq = [[20],[1]]
opt = linprog(c=obj, A_eq=lhs_eq, b_eq=rhs_eq, bounds=bnd, method="revised simplex")
optimal_weights = opt.x

How to resolve Boolean value error in linear regression model in python?

I am trying to run a fama-macbeth regression in a python. As afirst step I am running the time series for every asset in my portfolio but I am unable to run it because I am getting an error:
'ValueError: Must pass DataFrame with boolean values only'
I am relatively new to python and have heavily relied on this forum to help me out. I hope it you can help me with this issue.
Please let me know how I can resolve this. I will be very grateful to you!
I assume this line is producing the error. Cause when I run the function without the for loop, it works perfectly.
for i in range(cols):
df_beta = RegressionRoll(df=data_set, subset = 0, dependent = data_set.iloc[:,i], independent = data_set.iloc[:,30:], const = True, parameters = 'beta',
win = 12)
The dimension of my matrix is 108x35, 30 stocks and 5 factors over 108 points. Hence I want to run a regression for every stock against the 4 factors and store the result of the coeffs in a dataframe. Sample dataframe:
Date BAS GY AI FP SGL GY LNA GY AKZA NA Market Factor
1/29/2010 -5.28% -7.55% -1.23% -5.82% -7.09% -5.82%
2/26/2010 0.04% 13.04% -1.84% 4.06% -14.62% -14.62%
3/31/2010 10.75% 1.32% 7.33% 6.61% 12.21% 12.21%
The following is the entire code:
import pandas as pd
import statsmodels.api as sm
import numpy as np
import matplotlib.pyplot as plt
from statsmodels.stats.outliers_influence import variance_inflation_factor
data_set = pd.read_excel(r'C:\XXX\Research Project\Data\Regression.xlsx', sheet_name = 'Fama Macbeth')
data_set.set_index(data_set['Date'], inplace=True)
data_set.drop('Date', axis=1, inplace=True)
X = data_set.iloc[:,30:]
y = data_set.iloc[:,:30]
def RegressionRoll(df, subset, dependent, independent, const, win, parameters):
# Data subset
if subset != 0:
df = df.tail(subset)
else:
df = df
# Loopinfo
end = df.shape[0]
win = win
rng = np.arange(start = win, stop = end, step = 1)
# Subset and store dataframes
frames = {}
n = 1
for i in rng:
df_temp = df.iloc[:i].tail(win)
newname = 'df' + str(n)
frames.update({newname: df_temp})
n += 1
# Analysis on subsets
df_results = pd.DataFrame()
for frame in frames:
#print(frames[frame])
# Rolling data frames
dfr = frames[frame]
y = dependent
x = independent
if const == True:
x = sm.add_constant(dfr[x])
model = sm.OLS(dfr[y], x).fit()
else:
model = sm.OLS(dfr[y], dfr[x]).fit()
if parameters == 'beta':
theParams = model.params[0:]
coefs = theParams.to_frame()
df_temp = pd.DataFrame(coefs.T)
indx = dfr.tail(1).index[-1]
df_temp['Date'] = indx
df_temp = df_temp.set_index(['Date'])
df_results = pd.concat([df_results, df_temp], axis = 0)
if parameters == 'R2':
theParams = model.rsquared
df_temp = pd.DataFrame([theParams])
indx = dfr.tail(1).index[-1]
df_temp['Date'] = indx
df_temp = df_temp.set_index(['Date'])
df_temp.columns = [', '.join(independent)]
df_results = pd.concat([df_results, df_temp], axis = 0)
return(df_results)
cols = len(y.columns)
for i in range(cols):
df_beta = RegressionRoll(df=data_set, subset = 0, dependent = data_set.iloc[:,i], independent = data_set.iloc[:,30:], const = True, parameters = 'beta',
win = 12)
ValueError: Must pass DataFrame with boolean values only

Creating a vector of values based off a test using a for loop

This feels like it should be a simple problem but I am newer to python, in R i would use a foreach loop that gave me an option to combine.
I have tried a for loop that lets me print out all the values i need but i want them collected into a vector of values that i can use later.
from scipy.stats import gamma
import scipy.stats as stats
import numpy as np
import random
data2 = np.random.gamma(1,2, size = 500)
gammT = np.log(data2 + 1)
mean = np.mean(gammT)
sd = np.std(gammT)
a = (mean/ sd)**2
b = (sd**2)/ mean
for i in range(1,100):
gammT = random.sample(list(gammT), 500)
gamm = np.random.gamma(a,b, size = len(gammT))
s = stats.anderson_ksamp([gammT,gamm])
s = s[2]
print(s)
So i am able to print all the values i want but i want them all to be gathered together in a vector of values. I have tried to append and make lists but am not able to get them together.
from scipy.stats import gamma
import scipy.stats as stats
import numpy as np
import random
gammT = np.log(data2.iScore + 1)
mean = np.mean(gammT)
sd = np.std(gammT)
a = (mean/ sd)**2
b = (sd**2)/ mean
#initialize empty list
result=[]
for i in range(100):
# removed (1,100) you only need range(100) for 100 elements
gammT = random.sample(list(gammT), 500)
gamm = np.random.gamma(a,b, size = len(gammT))
s = stats.anderson_ksamp([gammT,gamm])
s = s[2]
#append calculation to list
result.append(s)
print(s)
print(result)

Simple Linear Regression Error in updating cost function and Theta Parameters

I wrote a piece code to make a simple linear regression model using Python. However, I am having trouble getting the correct cost function, and most importantly the correct theta parameters. The model is implemented from scratch and not using Scikit learn module. I have used Andrew NG's notes from his ML Coursera course to create the model. The correct values of theta are [[-3.630291] [1.166362]].
Would be really grateful if someone could offer their expertise, and point out what I'm doing wrong.
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
#Load The Dataset
dataset = pd.read_csv("Population vs Profit.txt",names=["Population" ,
"Profit"])
print (dataset.head())
col = len(dataset.columns)
x = dataset.iloc[:,:col-1].values
y = dataset.iloc[:,col-1].values
#Visualizing The Dataset
plt.scatter(x, y, color="red", marker="x", label="Profit")
plt.title("Population vs Profit")
plt.xlabel("Population")
plt.ylabel("Profit")
plt.legend()
plt.show()
#Preprocessing Data
dataset.insert(0,"x0",1)
col = len(dataset.columns)
x = dataset.iloc[:,:col-1].values
b = np.zeros(col-1)
m = len(y)
costlist = []
alpha = 0.001
iteration = 10000
#Defining Functions
def hypothesis(x,b,y):
h = x.dot(b.T) - y
return h
def cost(x,b,y,m):
j = np.sum(hypothesis(x,b,y)**2)
j = j/(2*m)
return j
print (cost(x,b,y,m))
def gradient_descent(x,b,y,m,alpha):
for i in range (iteration):
h = hypothesis(x,b,y)
product = np.sum(h.dot(x))
b = b - ((alpha/m)*product)
costlist.append(cost(x,b,y,m))
return b,cost(x,b,y,m)
b , mincost = gradient_descent(x,b,y,m,alpha)
print (b , mincost)
print (cost(x,b,y,m))
plt.plot(b,color="green")
plt.show()
The dataset I'm using is the following text.
6.1101,17.592
5.5277,9.1302
8.5186,13.662
7.0032,11.854
5.8598,6.8233
8.3829,11.886
7.4764,4.3483
8.5781,12
6.4862,6.5987
5.0546,3.8166
5.7107,3.2522
14.164,15.505
5.734,3.1551
8.4084,7.2258
5.6407,0.71618
5.3794,3.5129
6.3654,5.3048
5.1301,0.56077
6.4296,3.6518
7.0708,5.3893
6.1891,3.1386
20.27,21.767
5.4901,4.263
6.3261,5.1875
5.5649,3.0825
18.945,22.638
12.828,13.501
10.957,7.0467
13.176,14.692
22.203,24.147
5.2524,-1.22
6.5894,5.9966
9.2482,12.134
5.8918,1.8495
8.2111,6.5426
7.9334,4.5623
8.0959,4.1164
5.6063,3.3928
12.836,10.117
6.3534,5.4974
5.4069,0.55657
6.8825,3.9115
11.708,5.3854
5.7737,2.4406
7.8247,6.7318
7.0931,1.0463
5.0702,5.1337
5.8014,1.844
11.7,8.0043
5.5416,1.0179
7.5402,6.7504
5.3077,1.8396
7.4239,4.2885
7.6031,4.9981
6.3328,1.4233
6.3589,-1.4211
6.2742,2.4756
5.6397,4.6042
9.3102,3.9624
9.4536,5.4141
8.8254,5.1694
5.1793,-0.74279
21.279,17.929
14.908,12.054
18.959,17.054
7.2182,4.8852
8.2951,5.7442
10.236,7.7754
5.4994,1.0173
20.341,20.992
10.136,6.6799
7.3345,4.0259
6.0062,1.2784
7.2259,3.3411
5.0269,-2.6807
6.5479,0.29678
7.5386,3.8845
5.0365,5.7014
10.274,6.7526
5.1077,2.0576
5.7292,0.47953
5.1884,0.20421
6.3557,0.67861
9.7687,7.5435
6.5159,5.3436
8.5172,4.2415
9.1802,6.7981
6.002,0.92695
5.5204,0.152
5.0594,2.8214
5.7077,1.8451
7.6366,4.2959
5.8707,7.2029
5.3054,1.9869
8.2934,0.14454
13.394,9.0551
5.4369,0.61705
One issue is with your "product". It is currently a number when it should be a vector. I was able to get the values [-3.24044334 1.12719788] by rerwitting your for-loop as follows:
def gradient_descent(x,b,y,m,alpha):
for i in range (iteration):
h = hypothesis(x,b,y)
#product = np.sum(h.dot(x))
xvalue = x[:,1]
product = h.dot(xvalue)
hsum = np.sum(h)
b = b - ((alpha/m)* np.array([hsum , product]) )
costlist.append(cost(x,b,y,m))
return b,cost(x,b,y,m)
There's possibly another issue besides this as it doesn't converge to your answer. You should make sure you are using the same alpha also.

Categories