I am a python user new to R. Right now I am dealing with the R package GWmodel.
Looking at the function for the basic GWR, this looks like:
gwr.res <- gwr.basic(GenEl2004 ~ DiffAdd + LARent + SC1 + Unempl + LowEduc + Age18_24 + Age25_44 + Age45_64, data = Dub.voter, bw = 100, kernel = "bisquare", adaptive = TRUE, F123.test = TRUE)
What I need is to collet the mean of the estimate parameters of each variable and append it in a list for any given value of bw (bandwidth).
in python terms this would be like:
LARentMean = []
SC1Mean = []
UnenmplMean = []
LowEducMean = []
Age18_24Mean = []
Age25_44Mean = []
Age45_64Mean = []
for i in range (20,400):
gwrres = gwr.basic(GenEl2004 ~ DiffAdd + LARent + SC1 + Unempl + LowEduc + Age18_24 + Age25_44 + Age45_64, data = Dub.voter, bw = i, kernel = "bisquare", adaptive = TRUE, F123.test = TRUE)
a = gwrres(LARent).mean() #a <- mean(gwrres$SDF$LARent)
b = gwrres(SC1).mean() #b <- mean(gwrres$SDF$SC1)
c = gwrres(Unenmpl).mean() #c <- mean(gwrres$SDF$Unempl)
d = gwrres(lowEduc).mean() #d <- mean(gwrres$SDF$LowEduc)
e = gwrres(Age18_24).mean() #e <- mean(gwrres$SDF$Age18_24)
f = gwrres(Age25_44).mean() #f <- mean(gwrres$SDF$Age25_44)
g = gwrres(Age45_64).mean() #g <- mean(gwrres$SDF$Age45_64)
LARentMean.append(a)
SC1Mean.append(b)
UnenmplMean.append(c)
LowEducMean.append(d)
Age18_24Mean.append(e)
Age25_44Mean.append(f)
Age45_64Mean.append(g)
You can use lapply which will loop to a list, e.g.:
l = lapply(20:400, function(i){
gwr.basic(GenEl2004 ~ DiffAdd + LARent + SC1 + Unempl + LowEduc +
Age18_24 + Age25_44 + Age45_64, data=Dub.voter, bw=i,
kernel="bisquare", adaptive=T, F123.test=T)
})
I have no idea what gwr.basic generates as output, so you may want an extra line to only take the mean (if it puts out more information).
Related
This is my first time using Python.
I am having trouble passing from R to Python in the Nlopt package
So I am using a Maximum Likelihood Estimator to estimate 4 parameters. In R I programmed two functions: loglikelihood and gradient from the log-likelihood
In R both of my functions are like this:
loglik <- function(par){
g_h <- par[1]; g_c <- par[2]; a_bar <- par[3]; sigma_e <- par[4]
d <- mutate(obs_data,
num = h + ((I-g_c)/w),
den = g_h + ((I-g_c)/w),
eps = (num/den)-a_bar,
arg_1 = 0.5*(eps/sigma_e)^2,
arg_2 = log(abs(den)),
opt = log(sigma_e * sqrt(2*pi)) + arg_1 + arg_2)
LL <- - sum(d$opt)
return(-LL)
}
grad_loglik <- function(par){
g_h <- par[1]; g_c <- par[2]; a_bar <- par[3]; sigma_e <- par[4]
d <- obs_data %>%
mutate(num = h + ((I- g_c)/w),
den = g_h + ((I-g_c)/w),
eps = num/den - a_bar,
eps_gh = -num/(den^2),
eps_gc = (h-g_h)/(w*(den^2)),
ll_gh = -(eps/(sigma_e^2))*eps_gh - 1/den,
ll_gc = -(eps/(sigma_e^2))*eps_gc + 1/(w*den),
ll_abar = eps/(sigma_e^2),
ll_se = -1/sigma_e + (eps^2)/(sigma_e^3))
return(c(-sum(d$ll_gh),
-sum(d$ll_gc),
-sum(d$ll_abar),
-sum(d$ll_se)))
}
So I am trying to minimize the loglik function
The code for using the Nlopt function
opt_parr <- nloptr(x0 = val_i,
eval_f = loglik,
eval_grad_f = grad_loglik,
lb = c(0,0,0,0),
ub = c(24, 100, 1, 1),
opts = list("algorithm" = "NLOPT_LD_LBFGS",
"xtol_rel" = xtol,
"maxeval"= maxev,
"print_level" = 0))
So I translated both of my functions in python:
def loglik(par):
g_h = par[0]; g_c = par[1]; a_bar = par[2]; sigma_e = par[3]
d = obs_data
d = d.assign(num = lambda x: d.h + ((d.I - g_c)/d.w),
den = lambda x: g_h + ((d.I - g_c)/d.w),
eps = lambda x: (d.num/d.den) - a_bar,
arg1 = lambda x: 0.5*(d.eps/sigma_e)**2,
arg2 = lambda x: np.log(np.absolute(d.den)),
opt = lambda x: np.log(sigma_e * np.sqrt(2*np.pi)) + d.arg1 + d.arg2)
LL = -sum(d.opt)
return(-LL)
def grad_loglik(par):
g_h = par[0]; g_c = par[1]; a_bar = par[2]; sigma_e = par[3]
d = obs_data
d = d.assign(num = lambda x: d.h + ((d.I - g_c)/d.w),
den = lambda x: g_h + ((d.I - g_c)/d.w),
eps = lambda x: (d.num/d.den) - a_bar,
eps_gh = lambda x: -d.num/(d.den**2),
eps_gc = lambda x: (d.h-g_h)/(d.w*(d.den**2)),
ll_gh = lambda x: -(d.eps/(sigma_e**2))*d.eps_gh - 1/d.den,
ll_gc = lambda x: -(d.eps/(sigma_e**2))*d.eps_gc + 1/(d.w*d.den),
ll_abar = lambda x: d.eps/(sigma_e**2),
ll_se = lambda x: -1/sigma_e + (d.eps**2)/(sigma_e**3))
G1 = -sum(d.ll_gh)
G2 = -sum(d.ll_gc)
G3 = -sum(d.ll_abar)
G4 = -sum(d.ll_se)
G = [G1, G2, G3, G4]
return(G)
But I don't understand how to program the optimizer. So far this is my best try:
#%% Find optimal parameters
opt = nlopt.opt(nlopt.LD_LBFGS, 4)
opt.set_lower_bounds([0]*4)
opt.set_upper_bounds([24, 100, 1, 1])
opt.set_min_objective(loglik)
opt.set_xtol_rel(1e-64)
x = opt.optimize([1e-4]*4)
minf = opt.last_optimum_value()
print("optimum at ", x[0], x[1], x[2], x[3])
print("minimum value = ", minf)
print("result code = ", opt.last_optimize_result())
I don't know where to put the gradient function in order to make it work, in R was kinda clear.
But this page tell me that:
But since I am new in Python this doesn't tell me much info. Am I wrongly programming the gradient function? Where does it has to be?
Thanks in advance!
Data
Just use obs_data
import numpy as np
import pandas as pd
import nlopt
N = 100_000
np.random.seed(1)
wage = np.exp(np.random.normal(loc = 4, scale = 0.1, size = N))
nlincome = np.exp(np.random.normal(loc = 3, scale = 0.5, size = N))
eps_ = np.random.normal(loc = 0, scale = 0.01, size = N)
data = pd.DataFrame({'wages':wage, 'non_labor_income': nlincome,
'epsilon': eps_})
data = data.assign(alpha_bar = lambda x: α_bar + data['epsilon'])
check = data.assign(h = lambda x: (data['alpha_bar']+ data['epsilon'])*γ_h
- (((1-data['alpha_bar']-data['epsilon'])*
(data['non_labor_income']-γ_c))/(data['wages'])))
check = check.assign(l = lambda x: time - check.h,
c = lambda x: (check.wages*check.h)+check.non_labor_income,
total_income = lambda x: check.wages*check.h)
obs_data = check[['wages', 'h', 'non_labor_income']]
obs_data = obs_data.rename(columns = {"wages":"w", "non_labor_income":"i"})
I am having issues converting this code from VBA to python, it's a function that needs to be converted to python instead of VBA
Function NC(SPL, pond) As Single
Dim A As Single
Dim B As Single
Dim I As Integer
Dim SPL1(8) As Single
B = 0
If pond = "A" Then
SPL1(1) = SPL(1) + 26.2228
SPL1(2) = SPL(2) + 16.1897
SPL1(3) = SPL(3) + 8.6748
SPL1(4) = SPL(4) + 3.2478
SPL1(5) = SPL(5)
SPL1(6) = SPL(6) - 1.2017
SPL1(7) = SPL(7) - 0.9636
SPL1(8) = SPL(8) + 1.1469
Else
SPL1(1) = SPL(1)
SPL1(2) = SPL(2)
SPL1(3) = SPL(3)
SPL1(4) = SPL(4)
SPL1(5) = SPL(5)
SPL1(6) = SPL(6)
SPL1(7) = SPL(7)
SPL1(8) = SPL(8)
End If
For I = 1 To 8
If I = 1 Then
A = 1.5215 * SPL1(1) - 57.029
ElseIf I = 2 Then
A = 1.2855 * SPL1(2) - 31.628
ElseIf I = 3 Then
A = 1.1853 * SPL1(3) - 18.938
ElseIf I = 4 Then
A = 1.0888 * SPL1(4) - 8.5807
ElseIf I = 5 Then
A = 1.019 * SPL1(5) - 2.0793
ElseIf I = 6 Then
A = 0.9922 * SPL1(6) + 1.2421
ElseIf I = 7 Then
A = 0.9738 * SPL1(7) + 3.2226
ElseIf I = 8 Then
A = 0.9738 * SPL1(8) + 4.1964
End If
If A > B Then
B = A
End If
Next I
NC = Int(Round(B + 0.5))
End Function
This is what I have so far in python but it is giving me an errorcode error in python the error says it's an indexing issue in python I would like to solve that error so that the function works but not sure how exactly to solve it
def NC(SPL,pond):
SPL1 =[(0.0 for x in range(0,8))]
B = 0.0
if pond == 'A':
SPL1[0] = SPL[0] + 26.228
SPL1[1] = SPL[1] + 16.1897
SPL1[2] = SPL[2] + 8.6748
SPL1[3] = SPL[3] + 3.2478
SPL1[4] = SPL[4]
SPL1[5] = SPL[5] - 1.2017
SPL1[6] = SPL[6] - 0.9636
SPL1[7] = SPL[7] + 1.1469
else:
SPL1[0] = SPL[0]
SPL1[1] = SPL[1]
SPL1[2] = SPL[2]
SPL1[3] = SPL[3]
SPL1[4] = SPL[4]
SPL1[5] = SPL[5]
SPL1[6] = SPL[6]
SPL1[7] = SPL[7]
for i in range(0, 8):
if i == 0:
A = 1.5215 * SPL1[0] -57.029
if i == 1:
A = 1.2855 * SPL1[1] -31.628
if i == 2:
A = 1.1853 * SPL1[2] -18.938
if i == 3:
A = 1.0888 * SPL1[3] -8.5807
if i == 4:
A = 1.019 * SPL1[4] -2.0793
if i == 5:
A = 0.9922 * SPL1[5] +1.2421
if i == 6:
A = 0.9738 * SPL1[6] +3.2226
if i == 7:
A = 0.9738 * SPL1[7] +4.1964
if A > B:
B = A
return int(B+0.5)
Python is dynamically typed and 0 base indexed. Also by convention lower-case variable names are used. So your code would be something along the lines of:
def nc(spl, pond):
spl1 =[0.0 for _ in range(8))]
b = 0.0
if pond == 'A':
spl1[0] = spl[0] + 26.228
#and so on
else:
spl1[0] = spl[0]
# and so on
for i in range(0, 8):
if i == 0:
a = 1.5215 * spl1[0] -57.029
# and so on
if a > b:
b = a
return int(b+0.5)
I am trying to get buy and sell orders from binance api(python-binance) which has a limit of 500 values.(500 ask,500 buy). I already can get this with creating 500 variables with index numbers but it seems to me there has to be a better way than to write 500 lines of code.
This is the code I am trying to make it happen.
#!/usr/bin/python
# -*- coding: utf-8 -*-
from binance.client import Client
user_key = ''
secret_key = ''
binance_client = Client(user_key, secret_key)
while True:
alis = binance_client.futures_order_book(symbol='XRPUSDT')
binance_buy = alis['bids'][0]
binance_buy1 = alis['bids'][1]
binance_buy2 = alis['bids'][2]
binance_buy3 = alis['bids'][3]
binance_buy4 = alis['bids'][4]
binance_buy5 = alis['bids'][5]
binance_buy6 = alis['bids'][6]
binance_buy7 = alis['bids'][7]
binance_buy8 = alis['bids'][8]
binance_buy9 = alis['bids'][9]
binance_buy10 = alis['bids'][10]
binance_buy11 = alis['bids'][11]
binance_buy12 = alis['bids'][12]
binance_buy13 = alis['bids'][13]
binance_buy14 = alis['bids'][14]
binance_buy15 = alis['bids'][15]
binance_buy16 = alis['bids'][16]
binance_buy17 = alis['bids'][17]
binance_buy18 = alis['bids'][18]
binance_buy19 = alis['bids'][19]
binance_buy20 = alis['bids'][20]
binance_sell = alis['asks'][0]
binance_sell1 = alis['asks'][1]
binance_sell2 = alis['asks'][2]
binance_sell3 = alis['asks'][3]
binance_sell4 = alis['asks'][4]
binance_sell5 = alis['asks'][5]
binance_sell6 = alis['asks'][6]
binance_sell7 = alis['asks'][7]
binance_sell8 = alis['asks'][8]
binance_sell9 = alis['asks'][9]
binance_sell10 = alis['asks'][10]
binance_sell11 = alis['asks'][11]
binance_sell12 = alis['asks'][12]
binance_sell13 = alis['asks'][13]
binance_sell14 = alis['asks'][14]
binance_sell15 = alis['asks'][15]
binance_sell16 = alis['asks'][16]
binance_sell17 = alis['asks'][17]
binance_sell18 = alis['asks'][18]
binance_sell19 = alis['asks'][19]
binance_sell20 = alis['asks'][20]
binance_buy_demand = float(binance_buy[1]) + float(binance_buy1[1]) \
+ float(binance_buy2[1]) + float(binance_buy3[1]) \
+ float(binance_buy4[1]) + float(binance_buy5[1]) \
+ float(binance_buy6[1]) + float(binance_buy7[1]) \
+ float(binance_buy8[1]) + float(binance_buy9[1]) \
+ float(binance_buy10[1]) + float(binance_buy11[1]) \
+ float(binance_buy12[1]) + float(binance_buy13[1]) \
+ float(binance_buy14[1]) + float(binance_buy15[1]) \
+ float(binance_buy16[1]) + float(binance_buy17[1]) \
+ float(binance_buy18[1]) + float(binance_buy19[1]) \
+ float(binance_buy20[1])
for i in range(0, 500):
print (alis['asks'][i][0], alis['asks'][i][1])
binance_sell_demand = float(binance_sell[1]) \
+ float(binance_sell1[1]) + float(binance_sell2[1]) \
+ float(binance_sell3[1]) + float(binance_sell4[1]) \
+ float(binance_sell5[1]) + float(binance_sell6[1]) \
+ float(binance_sell7[1]) + float(binance_sell8[1]) \
+ float(binance_sell9[1]) + float(binance_sell10[1]) \
+ float(binance_sell11[1]) + float(binance_sell12[1]) \
+ float(binance_sell13[1]) + float(binance_sell14[1]) \
+ float(binance_sell15[1]) + float(binance_sell16[1]) \
+ float(binance_sell17[1]) + float(binance_sell18[1]) \
+ float(binance_sell19[1]) + float(binance_sell20[1])
there is 500 bids and 500 asks
after getting data I sum bids ands asks like this
I tried to make for loop but only could print this values cant sum it in for loop this is the code I tried:
sample output:
0.9315 18328.6
0.9316 18201.2
0.9317 23544.0
0.9318 260.4
0.9319 689.5
0.9320 20410.5
0.9321 47.7
0.9322 294.2
0.9323 446.6
0.9324 104.0
0.9325 3802.3
0.9326 100.1
0.9327 20122.9
0.9328 1410.0
0.9329 7745.1
0.9330 9094.4
0.9331 10389.9
0.9332 248.5
0.9333 71559.7
0.9334 18024.1
0.9335 7404.5
0.9336 1366.6
0.9337 21972.4
0.9338 1224.8
0.9339 49.9
0.9340 17590.5
0.9341 17967.1
0.9342 272.3
0.9343 704.4
0.9344 3581.7
0.9345 3896.8
the first items are price second is quantity
I am trying to make a function that sum and divide avg.price and also want to know how many bids total and asks total.
Use the sum() function with a generator that gets the appropriate element of each list item.
binance_buy_demand = sum(float(x[1]) for x in alis['bids'])
binance_sell_demand = sum(float(x[1]) for x in alis['asks'])
You can sum in a for loop like this:
total_ask_volume = 0
total_bid_volume = 0
for i in range(0, 500):
total_ask_volume += float(alis["asks"][i][1])
total_bid_volume += float(alis["bids"][i][1])
print(total_ask_volume, total_bid_volume)
Another option is to skip the i index, and go through the values directly:
total_ask_volume = 0
for ask in alis["asks"]:
total_ask_volume += float(ask[1])
total_bid_volume = 0
for bid in alis["bids"]:
total_bid_volume += float(bid[1])
print(total_ask_volume, total_bid_volume)
This can sometimes be clearer, particularly in situations where you'd otherwise have multiple indexes (i, j, k and so on) which could get confusing.
As a side note, float often rounds in unintuitive ways; it probably doesn't matter in this particular circumstance, but in most situations involving money you probably want to use Decimal instead (or multiply by 100 and count whole numbers of cents).
I have a dictionary with multiple key defined as (arbitrary inputs):
colors = {}
colors['red'] = {}
colors['blue'] = {}
colors['red'][clustname] = np.array([])
colors['blue'][clustname] = np.array([])
basically I want to plot a red v blue graph for each 'cluster'. I have 13 'clusters' in total with differing color values for each. The names in my code are different from the arbitrary ones above, but I figured it would be easier to understand with basic values then to look at the overall code:
colpath = '/home/jacob/PHOTOMETRY/RESTFRAME_COLOURS/' #This is the path to the restframe colors
goodcolindx = {}
colfiledat = {}
colors = {}
colors['UMINV'] = {}
colors['VMINJ'] = {}
colors['NUVMINV'] = {}
colors['id'] = {}
for iclust in range(len(clustname)):
colors['UMINV'][clustname[iclust]] = np.array([])
colors['VMINJ'][clustname[iclust]] = np.array([])
colors['id'][clustname[iclust]] = np.array([])
colors['NUVMINV'][clustname[iclust]] = np.array([])
filepath = catpath + clustname[iclust] + "_totalall_" + extname[iclust] + ".cat"
photdat[clustname[iclust]] = ascii.read(filepath)
filepath = zpath + "compilation_" + clustname[iclust] + ".dat"
zdat[clustname[iclust]] = ascii.read(filepath)
colfilepath = colpath + 'RESTFRAME_MASTER_' + clustname[iclust] + '_indivredshifts.cat'
colfiledat[clustname[iclust]] = ascii.read(colfilepath)
goodcolindx[clustname[iclust]] = np.where((colfiledat[clustname[iclust]]['REDSHIFTUSED'] > 0.9) & \
(colfiledat[clustname[iclust]]['REDSHIFTUSED'] < 1.5) & \
(photdat[clustname[iclust]]['totmask'] == 0) & \
(photdat[clustname[iclust]]['K_flag'] == 0) & \
((zdat[clustname[iclust]]['quality'] == 3) | (zdat[clustname[iclust]]['quality'] == 4)))
goodcolindx[clustname[iclust]] = goodcolindx[clustname[iclust]][0]
for igood in range(len(goodcolindx[clustname[iclust]])):
idstring = str(photdat[clustname[iclust]]['id'][goodcolindx[clustname[iclust]][igood]])
colors['NUVMINV'][clustname[iclust]] = np.append(colors['NUVMINV'][clustname[iclust]], -2.5 *
np.log10(colfiledat[clustname[iclust]]['NUV'][goodcolindx[clustname[iclust]][igood]]
/ colfiledat[clustname[iclust]]['V'][goodcolindx[clustname[iclust]][igood]]))'SpARCS-0035'
colors['UMINV'][clustname[iclust]] = np.append(colors['UMINV'][clustname[iclust]], colfiledat[clustname[iclust]]['UMINV'][goodcolindx[clustname[iclust]][igood]])
colors['id'][clustname[iclust]] = np.append(colors['id'][clustname[iclust]], photdat[clustname[iclust]]['id'][goodcolindx[clustname[iclust]][igood]])
colors['VMINJ'][clustname[iclust]] = np.append(colors['VMINJ'][clustname[iclust]], colfiledat[clustname[iclust]]['VMINJ'][goodcolindx[clustname[iclust]][igood]])
for iclustc in colors:
plt.plot(colors['VMINJ'][clustname[iclustc]], colors['UMINV'][clustname[iclustc]], 'ko')
plt.show()
So in this case, my 'red' is the VMINJ and my 'blue' is the UMINV. I am trying to use a for loop to cycle through all the cluster names that I have, but I keep getting the error back 'String indices must be integers'. I understand the basics of that, but don't know how to fix my code to make plots for each 'red' v 'blue' for each cluster. Any help would be awesome, let me know if you have questions
I figured it out. I changed the for loop to:
for iclust in range(len(clustname)):
plt.plot(colors['UMINV'][clustname[iclust]]....
and that worked
I've set up numpy.seterr as follows:
np.seterr(invalid='raise', over ='raise', under='raise')
And I'm getting the following error:
c = beta[j,i] + oneminusbeta[j,i]
FloatingPointError: overflow encountered in double_scalars
I've checked what beta[j,i] and oneminusbeta[j,i] are at the point of crash, and these are their values:
beta: -131.340389182
oneminusbeta: 0.0
Please note, this line of addition (beta[j,i] + oneminusbeta[j,i]) has run for thousands of lines in a loop (that performs image classification) before crashing here at this point.
How can I deal with this? Is it necessary to change the type of the numpy arrays?
This is how I've initialized them:
beta = np.empty([m,n])
oneminusbeta = np.empty([m,n])
Is it possible to cast the individual values before adding them up? Rather than changing the entire array declarations? Or is this even a serious issue? Would it be safe to simply turn off the numpy.seterr configuration and let the calculations go ahead without raising the error?
Edit
Someone suggested below, and I suspected as well, that the values being added shouldn't cause an overflow. Then how can I find out where the overflow is really happening?
This is my code:
epthreshold = 709
enthreshold = -708
f.write("weights["+str(i)+", " + str(j)+"] = math.exp(beta: " +str(beta[j,i])+ " + oneminusbeta: " + str(oneminusbeta[j,i])+")\n" )
c = beta[j,i] + oneminusbeta[j,i]
weights[i,j] = math.exp(np.clip(c, enthreshold, epthreshold))
And when I check my log file, this is the line I get:
weights[5550, 13] = math.exp(beta: -131.340389182 + oneminusbeta: 0.0)
Edit 2
Here's the rest of my code, where variables n,m and H have already been initialized to integer values:
import numba
import numpy as np
import statsmodels.api as sm
weights = np.empty([n,m])
for curr_n in range(n):
for curr_m in range(m):
weights[curr_n,curr_m] = 1.0/(n)
beta = np.empty([m,n])
oneminusbeta = np.empty([m,n])
for curr_class in range(m):
for curr_sample in range(n):
beta[curr_class,curr_sample] = 1./m
epthreshold = 709 # positive exponential threshold
enthreshold = -708
for h in range(H):
print "Boosting round %d ... " % h
z = np.empty([n,m])
for j in range(m): # computing working responses and weights, Step 2(a)(i)
for i in range(no_samples):
i_class = y[i] #get the correct class for the current sample
if h == 0:
z[i,j] = (int(j==i_class) - beta[j,i])/((beta[j,i])*(1. - beta[j,i]))
weights[i,j] = beta[j,i]*(1. - beta[j,i])
else:
if j == i_class:
z[i,j] = math.exp(np.clip(-beta[j,i],enthreshold, epthreshold))
else:
z[i,j] = -math.exp(np.clip(oneminusbeta[j,i], enthreshold, epthreshold))
f.write("weights["+str(i)+", " + str(j)+"] = math.exp(beta: " +str(beta[j,i])+ " + oneminusbeta: " + str(oneminusbeta[j,i])+")\n" )
c = beta[j,i] + oneminusbeta[j,i]
weights[i,j] = math.exp(np.clip(c, enthreshold, epthreshold))
g_h = np.zeros([1,1])
j = 0
# Calculating regression coefficients per class
# building the parameters per j class
for y1_w in zip(z.T, weights.T):
y1, w = y1_w
temp_g = sm.WLS(y1, X, w).fit() # Step 2(a)(ii)
if np.allclose(g_h,0):
g_h = temp_g.params
else:
g_h = np.c_[g_h, temp_g.params]
j = j + 1
if np.allclose(g,0):
g = g_h
else:
g = g + g_h # Step(2)(a)(iii)
# now set g(x), function coefficients according to new formula, step (2)(b)
sum_g = g.sum(axis=1)
for j in range(m):
diff = (g[:,j] - ((1./m) * sum_g))
g[:,j] = ((m-1.)/m) * diff
g_per_round[h,:,j] = g[:,j]
#Now computing beta, Step 2(c)...."
Q = 0.
e = 0.
for j in range(m):
# Calculating beta and oneminusbeta for class j
aj = 0.0
for i in range(no_samples):
i_class = y[i]
X1 = X[i].reshape(1, no_features)
g1 = g[:,j].reshape(no_features, 1)
gc = g[:,i_class].reshape(no_features, 1)
dot = 1. + float(np.dot(X1, g1)) - float(np.dot(X1,gc))
aj = dot
sum_e = 0.
a_q = []
a_q.append(0.)
for j2 in range(m): # calculating sum of e's except for all j except where j=i_class
if j2 != i_class: # g based on j2, not necessarily g1?
g2 = g[:,j2].reshape(no_features, 1)
dot1 = 1. + float(np.dot(X1, g2)) - float(np.dot(X1,gc))
e2 = math.exp(np.clip(dot1,enthreshold, epthreshold))
sum_e = sum_e + e2
a_q.append(dot1)
if (int(j==i_class) == 1):
a_q_arr = np.array(a_q)
alpha = np.array(a_q_arr[1:])
Q = mylogsumexp(f,a_q_arr, 1, 0)
sumalpha = mylogsumexp(f,alpha, 1, 0)
beta[j,i] = -Q
oneminusbeta[j,i] = sumalpha - Q
else:
alpha = a_q
alpha = np.array(alpha[1:])
a_q_arr = np.array(a_q)
Q = mylogsumexp(f,a_q_arr, 0, aj)
sumalpha = log(math.exp(np.clip(Q, enthreshold, epthreshold)) - math.exp(np.clip(aj, enthreshold, epthreshold)))
beta[j,i] = aj - Q
oneminusbeta[j,i] = sumalpha - Q
and the function mylogsumexp is:
def mylogsumexp(f, a, is_class, maxaj, axis=None, b=None):
np.seterr(over="raise", under="raise", invalid="raise")
threshold = -sys.float_info.max
maxthreshold = sys.float_info.max
epthreshold = 709 # positive exponential threshold
enthreshold = -708
a = asarray(a)
if axis is None:
a = a.ravel()
else:
a = rollaxis(a, axis)
if is_class == 1:
a_max = a.max(axis=0)
else:
a_max = maxaj
#bnone = " none "
if b is not None:
a_max = maxaj
b = asarray(b)
if axis is None:
b = b.ravel()
else:
b = rollaxis(b, axis)
a = np.clip(a - a_max, enthreshold, epthreshold)
midout = np.sum(np.exp(a), axis=0)
midout = 1.0 + np.clip(midout - math.exp(a_max), threshold, maxthreshold)
out = np.log(midout)
else:
a = np.clip(a - a_max, enthreshold, epthreshold)
out = np.log(np.sum(np.exp(a)))
out += a_max
if out == float("inf"):
out = maxthreshold
if out == float("-inf"):
out = threshold
return out