May someone suggests me how can I call the leaveOut entry for model prediction. Initially, the model is developed expect the leeaveout entry and now I am interested to check the error for the leaveOut entry using the developed model.
Sample code is as below:
import pandas as pd # Reading Table
import numpy as np # Processing Array
import scipy.stats # Computing Statistic
import matplotlib.pyplot as plt # Drawing Graph
import statsmodels.api as sm # Statistical Models
n = len(data)
a = data["aa"]
b= data["bb"]
MSE_predict = np.zeros(n)
for i in np.arange(n):
a_leaveOne = np.delete(a.values, i)
b_leaveOne = np.delete(b.values, i)
b_leaveOne=sm.add_constant(b_leaveOne)
model=sm.OLS(a_leaveOne, b_leaveOne).fit()
a_pre=model.predict([1],np.array(pres)[i])
MSE=np.square(np.subtract(a[i],a_pre)).mean()
print(MSE)
Related
import arviz as az
import matplotlib.pyplot as plt
import numpy as np
import pymc3 as pm
from pymc3.math import dot, exp
import pandas as pd
trace = pm.sample(
10000,
chains=4,
tune=400,
return_inferencedata=True,
)
summary = az.summary(trace, hdi_prob=0.95)
print(summary)
with m:
pm.set_data({"condition1": [val1], "condition2": [val2], "condition3":
[val3]})
ppc = pm.sample_posterior_predictive(trace)
In the above code I have the values of three condition available and I know the output as well, I want to calculate the probability of arriving at that output.
I estimate a VAR(1) model in statsmodels (the sample code is from statsmodels user guide).
import numpy as np
import pandas as pd
import statsmodels.api as sm
from statsmodels.tsa.api import VAR
from statsmodels.tsa.base.datetools import dates_from_str
# prepare the data
mdata = sm.datasets.macrodata.load_pandas().data
dates = mdata[['year', 'quarter']].astype(int).astype(str)
quarterly = dates["year"] + "Q" + dates["quarter"]
quarterly = dates_from_str(quarterly)
mdata = mdata[['realgdp','realcons','realinv']]
mdata.index = pd.DatetimeIndex(quarterly)
data = np.log(mdata).diff().dropna()
# make a VAR model
model = VAR(data)
results = model.fit(1)
I want to compute the variance of the VAR model (click here for an explanation). Is there an attribute or property of the VARResults object that can give the variance directly?
I have found the answer.
results.acf(0)
The acf() method of the VARResults object computes theoretical autocovariance function of the VAR model.
I was asked to write a program for Linear Regression with the following steps.
Load the R data set mtcars as a pandas dataframe.
Build another linear regression model by considering the log of independent variable wt, and log of dependent variable mpg.
Fit the model with data, and display the R-squared value
i tried the following 2 models and the tests are not passing. Is there an issue with my code.
#case 1
import statsmodels.api as sm
import numpy as np
mtcars = sm.datasets.get_rdataset('mtcars')
mtcars_data = mtcars.data
liner_model = sm.formula.ols('np.log(wt) ~ np.log(mpg)',mtcars_data)
liner_result = liner_model.fit()
print(liner_result.rsquared)
#case 2
import statsmodels.api as sa
import statsmodels.formula.api as sfa
import numpy as np
import pandas as pd
mtcars = sa.datasets.get_rdataset('mtcars')
cars_data = mtcars.data
lin_mod2 = pd.DataFrame(cars_data)
lin_mod2['wt'] = np.log(lin_mod2['wt'])
lin_mod2['mpg'] = np.log(lin_mod2['mpg'])
lin_mod1 = sfa.ols("wt~mpg",lin_mod2)
print(lin_mod1.fit().rsquared)
#or
import statsmodels.api as sm
import numpy as np
import pandas as pd
import statsmodels.formula.api as smf
mtcars = sm.datasets.get_rdataset('mtcars','datasets',cache=True).data
df = pd.DataFrame(mtcars)
model = smf.ols(formula='np.log(wt) ~ np.log(mpg)', data=df).fit()
r = model.rsquared
print(r)
well, it passed. the question was wrong I think all i had to do was print.
model.summary()
not
model.rsquared
I need to open this TensorBoard graph in Python or R.
You could use summary_iterator to read the event file generated by Tensorflow and make_ndarray to extract the tensor values from it.
Setup
import tensorflow as tf
import numpy as np
TAG_NAME = "classification_loss"
writer_train = tf.summary.create_file_writer("train")
for epoch in range(200):
data = np.math.exp(-(epoch/30)) + 0.1*np.random.random()
with writer_train.as_default():
tf.summary.scalar(name=TAG_NAME, data=data, step=epoch)
# output folder: train/events.out.tfevents.1618103384.eafddbceac44.283.2075165.v2
Read event file
import tensorflow as tf
import matplotlib.pyplot as plt
TAG_NAME = "classification_loss"
tf_event_file = "train/events.out.tfevents.1618103384.eafddbceac44.283.2075165.v2"
value_list = []
for e in tf.compat.v1.train.summary_iterator(tf_event_file):
for v in e.summary.value:
if v.tag == TAG_NAME:
value = tf.make_ndarray(v.tensor)
value_list.append(value)
plt.plot(value_list)
plt.grid()
plt.show()
Actually, the best and easiest way is you can download the graph data in CSV or JSON format from Tensorboard. Once you have graph data you can import data and plot it.
import tensorflow as tf
import matplotlib.pyplot as plt
def get_scalar_run_tensorboard(tag, filepath):
values,steps = [],[]
for e in tf.compat.v1.train.summary_iterator(filepath):
if len(e.summary.value)>0: #Skip first empty element
if e.summary.value[0].tag==tag:
tensor = (e.summary.value[0].tensor)
value,step = (tf.io.decode_raw(tensor.tensor_content,tf.float32)[0].numpy(),e.step)
values.append(value)
steps.append(step)
return values,steps
e.g.
val,st = get_scalar_run_tensorboard("epoch_loss",
"./logs/your_log/your_run/train/your_file.v2")
import matplotlib.pyplot as plt
plt.plot(st,val)
In this code I have produced a dataset using gaussian distribution and then I have tried to apply stochastic gradient descent
In each iteration, I am updating the theta array. But, it is not getting updated.
It remains zero after every iteration.
Gradient is non zero. But still, theta is not updated
Help me please
import numpy as np # linear algebra
import pandas as pd
# data processing, CSV file I/O (e.g. pd.read_csv)
import math
import random
import matplotlib.pyplot as plt
#generating random samples
theta=np.array([3,1,2])
X=[]
E=[]
Y=[]
a1=3
v1=4
a2=-1
v2=4
v3=2
for i in range (0,1000000):
x1=(1/math.sqrt(2*3.14*v1))* math.exp(-(random.random()-a1)**2/(2*v1))
x2=(1/math.sqrt(2*3.14*v2))* math.exp(-(random.random()-a2)**2/(2*v2))
X.append([x1,x2])
e=(1/math.sqrt(2*3.14*v3))* math.exp(-(random.random())**2/(2*v3))
y=theta[0]+theta[1]*x1+theta[2]*x2 + e
Y.append(y)
E.append(e)
#Now Applying Stochastic Gradient
##Batch_Size = 1
r=1
learning_rate=0.001
theta=np.array([0,0,0])
theta = theta.reshape(3,1)
X= pd.DataFrame(X,columns=['X1','X2'])
Y=pd.DataFrame(Y,columns=['Y'])
Y.head()
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test=train_test_split(X,Y,test_size=40,random_state=50)
X_train['X0']=np.ones(len(X_train))
y_train.head()
def gradient_descent(x,y,theta,lr):
m=len(y)
prediction=(x.dot(theta)).to_numpy()
gradient = prediction-y.to_numpy()
current_cost= (1/2*m)*np.sum(np.square(prediction-y.to_numpy()) )
return gradient,current_cost
n_iterations=1000
import random
theta_history=[]
cost_history=[]
for i in range(0,n_iterations):
xi=X_train.sample(r)
yi=y_train.sample(r)
m=len(xi)
gradient,current_cost= gradient_descent(xi,yi,theta,learning_rate)
theta[0]= theta[0]-learning_rate*
((1/m)*np.sum(np.multiply(gradient,xi['X1'].to_numpy().reshape(m,1))))
theta[1]= theta[1]-learning_rate*
((1/m)*np.sum(np.multiply(gradient,xi['X2'].to_numpy().reshape(m,1))))
theta[2]= theta[2]-learning_rate*
((1/m)*np.sum(np.multiply(gradient,xi['X0'].to_numpy().reshape(m,1))))
print("theta=",theta)
theta_history.append(theta)
cost_history.append(current_cost)
if prev_index>=len(X_train):
break
You reassigned theta on line 34 to empty values: theta=np.array([0,0,0])