Specifying linear regression in python

Specifying linear regression in python - python

I tried to implement robust standard errors to my linear regression, but received the following error:
LinAlgError: Singular matrix
Here is the code that raises the error:
#Get robust standard errors
Robust=sm.OLS(y,X, M=sm.robust.norms.HuberT()).fit()
Robust.summary()
qq = sm.qqplot(Robust.resid, line = 'r')
Robust2=sm.RLM(y,X, M=sm.robust.norms.HuberT()).fit(cov="H2")
Robust2.summary()
shapiro(residual)
pred_val2 = Robust2.fittedvalues.copy()
true_val = regression_data_performance['CRSP_return_p1'].values.copy()
residual2 = true_val - pred_val2
shapiro(residual2)
data=['Sales','up_revision','pos_EPS','filing_range','time','DebttoAssets','ShareOverhang','VentureBacked','CRSP_ret_SP500_prior30days',
'CRSP_vola_SP500_prior30days','top_tier_dummy','ProceedsAmtInThisMktMil','Age','MainSICCode_industry48_y_Soda',
'MainSICCode_industry48_y_PerSv','MainSICCode_industry48_y_Rtail','MainSICCode_industry48_y_Rubbr','MainSICCode_industry48_y_Ships',
'MainSICCode_industry48_y_Smoke','MainSICCode_industry48_y_Steel','MainSICCode_industry48_y_Toys','MainSICCode_industry48_y_Txtls','MainSICCode_industry48_y_Whlsl','CRSP_return_p1']
regression_data_performance=regression_data_performance[~isNaN(regression_data_performance['filing_range'])]
data=regression_data_performance[data]
data1=data.fillna(0)
data2 = preprocessing.scale(data1)
In case it helps you here is the complete error message:
LinAlgError Traceback (most recent call last)
<ipython-input-33-1febd940375c> in <module>
3 Robust.summary()
4 qq = sm.qqplot(Robust.resid, line = 'r')
----> 5 Robust2=sm.RLM(y,X, M=sm.robust.norms.HuberT()).fit(cov="H2")
6 Robust2.summary()
7 shapiro(residual)
~\anaconda3\lib\site-packages\statsmodels\robust\robust_linear_model.py in fit(self, maxiter, tol, scale_est, init, cov, update_scale, conv, start_params)
299 converged = _check_convergence(criterion, iteration, tol, maxiter)
300 results = RLMResults(self, wls_results.params,
--> 301 self.normalized_cov_params, self.scale)
302
303 history['iteration'] = iteration
~\anaconda3\lib\site-packages\statsmodels\robust\robust_linear_model.py in __init__(self, model, params, normalized_cov_params, scale)
410 self.data_in_cache = ['sresid']
411
--> 412 self.cov_params_default = self.bcov_scaled
413 # TODO: "pvals" should come from chisq on bse?
414
pandas\_libs\properties.pyx in pandas._libs.properties.CachedProperty.__get__()
~\anaconda3\lib\site-packages\statsmodels\robust\robust_linear_model.py in bcov_scaled(self)
453 W = np.dot(model.M.psi_deriv(self.sresid) * model.exog.T,
454 model.exog)
--> 455 W_inv = np.linalg.inv(W)
456 # [W_jk]^-1 = [SUM(psi_deriv(Sr_i)*x_ij*x_jk)]^-1
457 # where Sr are the standardized residuals
<__array_function__ internals> in inv(*args, **kwargs)
~\anaconda3\lib\site-packages\numpy\linalg\linalg.py in inv(a)
545 signature = 'D->D' if isComplexType(t) else 'd->d'
546 extobj = get_linalg_error_extobj(_raise_linalgerror_singular)
--> 547 ainv = _umath_linalg.inv(a, signature=signature, extobj=extobj)
548 return wrap(ainv.astype(result_t, copy=False))
549
~\anaconda3\lib\site-packages\numpy\linalg\linalg.py in _raise_linalgerror_singular(err, flag)
95
96 def _raise_linalgerror_singular(err, flag):
---> 97 raise LinAlgError("Singular matrix")
98
99 def _raise_linalgerror_nonposdef(err, flag):
LinAlgError: Singular matrix
Does anyone know how I can solve the error? If you need more information please let me know.

Related

Custom scaler transformer fit error only when used in ColumnTransformer pipeline

I have a df with some columns target time series which I need to scale as 1D array and as two thus defining custom wrapper to extend scikit-learn scalers with optional preprocessing as flatten. transform and inverse_transform are not posted for simplicity
import copy
class ScalerOptFlatten( TransformerMixin,
BaseEstimator ):
def __init__( self, instance, flatten , **kwargs):
#super().__init__(**kwargs)
self.scaler = instance
self.flatten = flatten
def get_params(self, deep=True):
cp = copy.copy(self.scaler)
cp.__class__ = type(self.scaler)
params = type(self.scaler).get_params(cp, deep)
return params
def fit( self, X, y = None ):
if self.flatten:
scale_in = X.reshape(-1).reshape(-1, 1)
else:
scale_in = X
if y==None:
self.scaler.fit(scale_in)
else:
self.scaler.fit(scale_in,y)
return self
It works when used on its own
scaler_transformer = ScalerOptFlatten( instance = StandardScaler(),
flatten = True )
scaler_transformer.fit(df_unstack)
But fails when stacked in
preprocessor = ColumnTransformer(
transformers=[
("ts", scaler_transformer, target_feature_names)
]
)
preprocessor.fit(df_unstack)
With
---------------------------------------------------------------------------
Empty Traceback (most recent call last)
~/anaconda3/envs/tf_2_2_0/lib/python3.8/site-packages/joblib/parallel.py in dispatch_one_batch(self, iterator)
826 try:
--> 827 tasks = self._ready_batches.get(block=False)
828 except queue.Empty:
~/anaconda3/envs/tf_2_2_0/lib/python3.8/queue.py in get(self, block, timeout)
166 if not self._qsize():
--> 167 raise Empty
168 elif timeout is None:
Empty:
During handling of the above exception, another exception occurred:
TypeError Traceback (most recent call last)
<ipython-input-494-0eb4b591d8a6> in <module>
----> 1 ts_scaler_step.fit(df_unstack)
~/anaconda3/envs/tf_2_2_0/lib/python3.8/site-packages/sklearn/compose/_column_transformer.py in fit(self, X, y)
492 # we use fit_transform to make sure to set sparse_output_ (for which we
493 # need the transformed data) to have consistent output type in predict
--> 494 self.fit_transform(X, y=y)
495 return self
496
~/anaconda3/envs/tf_2_2_0/lib/python3.8/site-packages/sklearn/compose/_column_transformer.py in fit_transform(self, X, y)
529 self._validate_remainder(X)
530
--> 531 result = self._fit_transform(X, y, _fit_transform_one)
532
533 if not result:
~/anaconda3/envs/tf_2_2_0/lib/python3.8/site-packages/sklearn/compose/_column_transformer.py in _fit_transform(self, X, y, func, fitted)
456 self._iter(fitted=fitted, replace_strings=True))
457 try:
--> 458 return Parallel(n_jobs=self.n_jobs)(
459 delayed(func)(
460 transformer=clone(trans) if not fitted else trans,
~/anaconda3/envs/tf_2_2_0/lib/python3.8/site-packages/joblib/parallel.py in __call__(self, iterable)
1046 # remaining jobs.
1047 self._iterating = False
-> 1048 if self.dispatch_one_batch(iterator):
1049 self._iterating = self._original_iterator is not None
1050
~/anaconda3/envs/tf_2_2_0/lib/python3.8/site-packages/joblib/parallel.py in dispatch_one_batch(self, iterator)
836 big_batch_size = batch_size * n_jobs
837
--> 838 islice = list(itertools.islice(iterator, big_batch_size))
839 if len(islice) == 0:
840 return False
~/anaconda3/envs/tf_2_2_0/lib/python3.8/site-packages/sklearn/compose/_column_transformer.py in <genexpr>(.0)
458 return Parallel(n_jobs=self.n_jobs)(
459 delayed(func)(
--> 460 transformer=clone(trans) if not fitted else trans,
461 X=_safe_indexing(X, column, axis=1),
462 y=y,
~/anaconda3/envs/tf_2_2_0/lib/python3.8/site-packages/sklearn/utils/validation.py in inner_f(*args, **kwargs)
70 FutureWarning)
71 kwargs.update({k: arg for k, arg in zip(sig.parameters, args)})
---> 72 return f(**kwargs)
73 return inner_f
74
~/anaconda3/envs/tf_2_2_0/lib/python3.8/site-packages/sklearn/base.py in clone(estimator, safe)
86 for name, param in new_object_params.items():
87 new_object_params[name] = clone(param, safe=False)
---> 88 new_object = klass(**new_object_params)
89 params_set = new_object.get_params(deep=False)
90
TypeError: __init__() missing 2 required positional arguments: 'instance' and 'flatten'
I suspect that error is in get_param method

Scipy "masked arrays are not supported" error

I am trying to calibrate a model using pykalman and the scipy optimiser. For some reasons scipy seem to think that my input is a masked array, but it is not. I have added the code below:
k = 0.00000000000001 #small sarting value
T = np.array([1, 2, 3, 4, 5] , dtype=int) #maturities
delta = 1e-9
x = k
transition_covariance=delta / (1 - delta) * np.eye(1)
def obj(x):
k = x[0]
Q = (1 - k) *np.eye(1)
Z = np.exp(-k*T)
Z =Z[:, None] #reshape to 2 dim
transition_covariance=delta / (1 - delta) * np.eye(1)
observation_covariance=0.001*np.eye(len(T))
kf = KalmanFilter(transition_matrices=np.eye(1)*(1-k),
observation_matrices= Z,
transition_covariance=transition_covariance,
observation_covariance=observation_covariance)
lk = kf.loglikelihood(X) # X is my observed data, 1265 rows × 5 columns
return -lk
x = np.array([k])
k =scipy.optimize.minimize(obj, x, method= 'TNC')
Running the scipy optimiser I get the error:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-16-8d87c9f2ced0> in <module>
1 # method that works: TNC, Nelder-Mead
----> 2 k =scipy.optimize.minimize(obj, x, method= 'TNC')
~\Anaconda3\lib\site-packages\scipy\optimize\_minimize.py in minimize(fun, x0, args, method, jac, hess, hessp, bounds, constraints, tol, callback, options)
618 callback=callback, **options)
619 elif meth == 'tnc':
--> 620 return _minimize_tnc(fun, x0, args, jac, bounds, callback=callback,
621 **options)
622 elif meth == 'cobyla':
~\Anaconda3\lib\site-packages\scipy\optimize\tnc.py in _minimize_tnc(fun, x0, args, jac, bounds, eps, scale, offset, mesg_num, maxCGit, maxiter, eta, stepmx, accuracy, minfev, ftol, xtol, gtol, rescale, disp, callback, finite_diff_rel_step, maxfun, **unknown_options)
373 messages = MSG_NONE
374
--> 375 sf = _prepare_scalar_function(fun, x0, jac=jac, args=args, epsilon=eps,
376 finite_diff_rel_step=finite_diff_rel_step,
377 bounds=new_bounds)
~\Anaconda3\lib\site-packages\scipy\optimize\optimize.py in _prepare_scalar_function(fun, x0, jac, args, bounds, epsilon, finite_diff_rel_step, hess)
259 # ScalarFunction caches. Reuse of fun(x) during grad
260 # calculation reduces overall function evaluations.
--> 261 sf = ScalarFunction(fun, x0, args, grad, hess,
262 finite_diff_rel_step, bounds, epsilon=epsilon)
263
~\Anaconda3\lib\site-packages\scipy\optimize\_differentiable_functions.py in __init__(self, fun, x0, args, grad, hess, finite_diff_rel_step, finite_diff_bounds, epsilon)
74
75 self._update_fun_impl = update_fun
---> 76 self._update_fun()
77
78 # Gradient evaluation
~\Anaconda3\lib\site-packages\scipy\optimize\_differentiable_functions.py in _update_fun(self)
164 def _update_fun(self):
165 if not self.f_updated:
--> 166 self._update_fun_impl()
167 self.f_updated = True
168
~\Anaconda3\lib\site-packages\scipy\optimize\_differentiable_functions.py in update_fun()
71
72 def update_fun():
---> 73 self.f = fun_wrapped(self.x)
74
75 self._update_fun_impl = update_fun
~\Anaconda3\lib\site-packages\scipy\optimize\_differentiable_functions.py in fun_wrapped(x)
68 def fun_wrapped(x):
69 self.nfev += 1
---> 70 return fun(x, *args)
71
72 def update_fun():
<ipython-input-13-48731c614f15> in obj(x)
19 observation_covariance=observation_covariance)
20
---> 21 lk = kf.loglikelihood(X) # X is my observed data, 1016 rows × 17 columns
22 return -lk
~\Anaconda3\lib\site-packages\pykalman\standard.py in loglikelihood(self, X)
1470
1471 # get likelihoods for each time step
-> 1472 loglikelihoods = _loglikelihoods(
1473 observation_matrices, observation_offsets, observation_covariance,
1474 predicted_state_means, predicted_state_covariances, Z
~\Anaconda3\lib\site-packages\pykalman\standard.py in _loglikelihoods(observation_matrices, observation_offsets, observation_covariance, predicted_state_means, predicted_state_covariances, observations)
165 + observation_covariance
166 )
--> 167 loglikelihoods[t] = log_multivariate_normal_density(
168 observation[np.newaxis, :],
169 predicted_observation_mean[np.newaxis, :],
~\Anaconda3\lib\site-packages\pykalman\utils.py in log_multivariate_normal_density(X, means, covars, min_covar)
71 lower=True)
72 cv_log_det = 2 * np.sum(np.log(np.diagonal(cv_chol)))
---> 73 cv_sol = solve_triangular(cv_chol, (X - mu).T, lower=True).T
74 log_prob[:, c] = - .5 * (np.sum(cv_sol ** 2, axis=1) + \
75 n_dim * np.log(2 * np.pi) + cv_log_det)
~\Anaconda3\lib\site-packages\scipy\linalg\basic.py in solve_triangular(a, b, trans, lower, unit_diagonal, overwrite_b, debug, check_finite)
332
333 a1 = _asarray_validated(a, check_finite=check_finite)
--> 334 b1 = _asarray_validated(b, check_finite=check_finite)
335 if len(a1.shape) != 2 or a1.shape[0] != a1.shape[1]:
336 raise ValueError('expected square matrix')
~\Anaconda3\lib\site-packages\scipy\_lib\_util.py in _asarray_validated(a, check_finite, sparse_ok, objects_ok, mask_ok, as_inexact)
259 if not mask_ok:
260 if np.ma.isMaskedArray(a):
--> 261 raise ValueError('masked arrays are not supported')
262 toarray = np.asarray_chkfinite if check_finite else np.asarray
263 a = toarray(a)
ValueError: masked arrays are not supported
Maybe something to do with an update in the scipy library? It worked with a different setup, so that's all I can think. My versions are:
Python 3.8.5
Scipy 1.5.2
Pykalman 0.9.5
Thanks!

I found the solution, which involves a small change in the utils.py file in the pykalman library (line 73):
try:
cv_sol = solve_triangular(cv_chol, (X - mu).T, lower=True).T
except ValueError:
cv_sol = np.linalg.solve(cv_chol, (X - mu).T).T
Source:
https://github.com/pykalman/pykalman/issues/83

category_encoders: TargetEncoder error "TypeError: Categorical cannot perform the operation mean"

I'm getting the below error when I try to target encode a categorical column.
"TypeError: Categorical cannot perform the operation mean"
When I try to run similar code through Jupyter Notebook it works fine but when I try to run it as part of python file it errors out with the above error message.
I know this sounds a bit crazy but I am not able to understand what's going on in the background?
Error:
TypeError Traceback (most recent call last)
<ipython-input-4-9ba53c6b7375> in <module>()
----> 1 tpmodeller.initialize()
~/SageMaker/tp/tp_kvr.py in initialize(self)
127
128 # Target Encode Te_cat_col Features
--> 129 df_cat_te = target_encode_bin(self.train[self.Te_cat_col], self.train['vol_trmnt_in_4_quarters'])
130 self.train = pd.concat([self.train, df_cat_te], axis=1)
131
~/SageMaker/tp/tp_kvr.py in target_encode_bin(df_te, target)
366 te = TargetEncoder(smoothing = 1, min_samples_leaf = 5, handle_unknown='ignore')
--> 367 df_te = te.fit_transform(df_te, target)
368 #
369 # Binning and then placing it in {col}_bin feature
~/anaconda3/envs/python3/lib/python3.6/site-packages/category_encoders/target_encoder.py in fit_transform(self, X, y, **fit_params)
249 transform(X)
250
--> 251 return self.fit(X, y, **fit_params).transform(X, y)
252
~/anaconda3/envs/python3/lib/python3.6/site-packages/category_encoders/target_encoder.py in fit(self, X, y, **kwargs)
138 self.ordinal_encoder = self.ordinal_encoder.fit(X)
139 X_ordinal = self.ordinal_encoder.transform(X)
--> 140 self.mapping = self.fit_target_encoding(X_ordinal, y)
141
142 X_temp = self.transform(X, override_return_df=True)
~/anaconda3/envs/python3/lib/python3.6/site-packages/category_encoders/target_encoder.py in fit_target_encoding(self, X, y)
164 values = switch.get('mapping')
165
--> 166 prior = self._mean = y.mean()
167
168 stats = y.groupby(X[col]).agg(['count', 'mean'])
~/anaconda3/envs/python3/lib/python3.6/site-packages/pandas/core/generic.py in stat_func(self, axis, skipna, level, numeric_only, **kwargs)
10954 skipna=skipna)
10955 return self._reduce(f, name, axis=axis, skipna=skipna,
> 10956 numeric_only=numeric_only)
10957
10958 return set_function_name(stat_func, name, cls)
~/anaconda3/envs/python3/lib/python3.6/site-packages/pandas/core/series.py in _reduce(self, op, name, axis, skipna, numeric_only, filter_type, **kwds)
3614 # TODO deprecate numeric_only argument for Categorical and use
3615 # skipna as well, see GH25303
-> 3616 return delegate._reduce(name, numeric_only=numeric_only, **kwds)
3617 elif isinstance(delegate, ExtensionArray):
3618 # dispatch to ExtensionArray interface
~/anaconda3/envs/python3/lib/python3.6/site-packages/pandas/core/arrays/categorical.py in _reduce(self, name, axis, **kwargs)
2170 if func is None:
2171 msg = 'Categorical cannot perform the operation {op}'
-> 2172 raise TypeError(msg.format(op=name))
2173 return func(**kwargs)
2174
TypeError: Categorical cannot perform the operation mean

Your target feature needs to be converted to a 'number' type.

Stats Models Logit().fit() function throwing an error LinAlgError: Singular matrix

I am trying to to create run a logit model on a dataset where mpg_high is the outcome variable based on the other data frame columns.
When I run the following code I do not get any errors:
exog = ['constant','cylinders','displacement','horsepower','weight','year', 'origin']
logit = sm.Logit(endog = df_quant2['mpg_high'], exog = df_quant2[['constant','cylinders','displacement','horsepower','weight','year', 'origin']])
but when i try to use
logit.fit()
I get the following error:
Warning: Maximum number of iterations has been exceeded.
Current function value: inf
Iterations: 35
/Users/*/anaconda/lib/python3.6/site-packages/statsmodels/discrete/discrete_model.py:1214: RuntimeWarning: overflow encountered in exp
return 1/(1+np.exp(-X))
/Users/*/anaconda/lib/python3.6/site-packages/statsmodels/discrete/discrete_model.py:1264: RuntimeWarning: divide by zero encountered in log
return np.sum(np.log(self.cdf(q*np.dot(X,params))))
---------------------------------------------------------------------------
LinAlgError Traceback (most recent call last)
<ipython-input-112-f8dd482d7884> in <module>()
----> 1 logit.fit()
/Users/*/anaconda/lib/python3.6/site-packages/statsmodels/discrete/discrete_model.py in fit(self, start_params, method, maxiter, full_output, disp, callback, **kwargs)
1375 bnryfit = super(Logit, self).fit(start_params=start_params,
1376 method=method, maxiter=maxiter, full_output=full_output,
-> 1377 disp=disp, callback=callback, **kwargs)
1378
1379 discretefit = LogitResults(self, bnryfit)
/Users/*/anaconda/lib/python3.6/site-packages/statsmodels/discrete/discrete_model.py in fit(self, start_params, method, maxiter, full_output, disp, callback, **kwargs)
202 mlefit = super(DiscreteModel, self).fit(start_params=start_params,
203 method=method, maxiter=maxiter, full_output=full_output,
--> 204 disp=disp, callback=callback, **kwargs)
205
206 return mlefit # up to subclasses to wrap results
/Users/*/anaconda/lib/python3.6/site-packages/statsmodels/base/model.py in fit(self, start_params, method, maxiter, full_output, disp, fargs, callback, retall, skip_hessian, **kwargs)
456 Hinv = cov_params_func(self, xopt, retvals)
457 elif method == 'newton' and full_output:
--> 458 Hinv = np.linalg.inv(-retvals['Hessian']) / nobs
459 elif not skip_hessian:
460 H = -1 * self.hessian(xopt)
/Users/*/anaconda/lib/python3.6/site-packages/numpy/linalg/linalg.py in inv(a)
511 signature = 'D->D' if isComplexType(t) else 'd->d'
512 extobj = get_linalg_error_extobj(_raise_linalgerror_singular)
--> 513 ainv = _umath_linalg.inv(a, signature=signature, extobj=extobj)
514 return wrap(ainv.astype(result_t, copy=False))
515
/Users/*/anaconda/lib/python3.6/site-packages/numpy/linalg/linalg.py in _raise_linalgerror_singular(err, flag)
88
89 def _raise_linalgerror_singular(err, flag):
---> 90 raise LinAlgError("Singular matrix")
91
92 def _raise_linalgerror_nonposdef(err, flag):
LinAlgError: Singular matrix
All the values in the mpg_high values are either 0 or 1
Not sure what I am missing here, appreciate any help!
Thanks!

UPDATE
I was able to get some of the model working by excluding the Horsepower variable from the endog arguments. It may have been due to the data type. I have sinced converted it to a float64 but the model still will not run with the now changed column data type

pymc3: Disaster example with deterministic switchpoint function

I'm trying to reproduce coal mining example with deterministic function for switchpoint instead of using theano's switch function. Code:
%matplotlib inline
import matplotlib.pyplot as plt
import pymc3
import numpy as np
import theano.tensor as t
import theano
data = np.hstack((np.random.poisson(15,1000),np.random.poisson(2,100)))
plt.plot(data)
#theano.compile.ops.as_op(itypes=[t.lscalar, t.dscalar,t.dscalar],otypes=[t.dvector])
def rate1(sw,mu1,mu2):
n = len(data)
out = np.empty(n)
out[:sw] = mu1
out[sw:] = mu2
return out
with pymc3.Model() as dis:
switchpoint = pymc3.DiscreteUniform('switchpoint',lower=0, upper=len(data)-1)
mu1 = pymc3.Exponential('mu1', lam=1.)
mu2 = pymc3.Exponential('mu2',lam=1.)
disasters=pymc3.Poisson('disasters', mu=rate1, observed = data)
But this code rise an error:
--------------------------------------------------------------------------- KeyError Traceback (most recent call
last) c:\program files\git\theano\theano\tensor\type.py in
dtype_specs(self)
266 'complex64': (complex, 'theano_complex64', 'NPY_COMPLEX64')
--> 267 }[self.dtype]
268 except KeyError:
KeyError: 'object'
During handling of the above exception, another exception occurred:
TypeError Traceback (most recent call
last) c:\program files\git\theano\theano\tensor\basic.py in
constant_or_value(x, rtype, name, ndim, dtype)
407 rval = rtype(
--> 408 TensorType(dtype=x_.dtype, broadcastable=bcastable),
409 x_.copy(),
c:\program files\git\theano\theano\tensor\type.py in init(self,
dtype, broadcastable, name, sparse_grad)
49 self.broadcastable = tuple(bool(b) for b in broadcastable)
---> 50 self.dtype_specs() # error checking is done there
51 self.name = name
c:\program files\git\theano\theano\tensor\type.py in dtype_specs(self)
269 raise TypeError("Unsupported dtype for %s: %s"
--> 270 % (self.class.name, self.dtype))
271
TypeError: Unsupported dtype for TensorType: object
During handling of the above exception, another exception occurred:
TypeError Traceback (most recent call
last) c:\program files\git\theano\theano\tensor\basic.py in
as_tensor_variable(x, name, ndim)
201 try:
--> 202 return constant(x, name=name, ndim=ndim)
203 except TypeError:
c:\program files\git\theano\theano\tensor\basic.py in constant(x,
name, ndim, dtype)
421 ret = constant_or_value(x, rtype=TensorConstant, name=name, ndim=ndim,
--> 422 dtype=dtype)
423
c:\program files\git\theano\theano\tensor\basic.py in
constant_or_value(x, rtype, name, ndim, dtype)
416 except Exception:
--> 417 raise TypeError("Could not convert %s to TensorType" % x, type(x))
418
TypeError: ('Could not convert FromFunctionOp{rate1} to TensorType',
)
During handling of the above exception, another exception occurred:
AsTensorError Traceback (most recent call
last) in ()
14 mu2 = pymc3.Exponential('mu2',lam=1.)
15 #rate1 = pymc3.switch(switchpoint >= np.arange(len(data)), mu1,mu2)
---> 16 disasters=pymc3.Poisson('disasters', mu=rate1, observed = data)
C:\Users\User\Anaconda3\lib\site-packages\pymc3\distributions\distribution.py
in new(cls, name, *args, **kwargs)
19 if isinstance(name, str):
20 data = kwargs.pop('observed', None)
---> 21 dist = cls.dist(*args, **kwargs)
22 return model.Var(name, dist, data)
23 elif name is None:
C:\Users\User\Anaconda3\lib\site-packages\pymc3\distributions\distribution.py
in dist(cls, *args, **kwargs)
32 def dist(cls, *args, **kwargs):
33 dist = object.new(cls)
---> 34 dist.init(*args, **kwargs)
35 return dist
36
C:\Users\User\Anaconda3\lib\site-packages\pymc3\distributions\discrete.py
in init(self, mu, *args, **kwargs)
185 super(Poisson, self).init(*args, **kwargs)
186 self.mu = mu
--> 187 self.mode = floor(mu).astype('int32')
188
189 def random(self, point=None, size=None, repeat=None):
c:\program files\git\theano\theano\gof\op.py in call(self,
*inputs, **kwargs)
598 """
599 return_list = kwargs.pop('return_list', False)
--> 600 node = self.make_node(*inputs, **kwargs)
601
602 if config.compute_test_value != 'off':
c:\program files\git\theano\theano\tensor\elemwise.py in
make_node(self, *inputs)
540 using DimShuffle.
541 """
--> 542 inputs = list(map(as_tensor_variable, inputs))
543 shadow = self.scalar_op.make_node(
544 *[get_scalar_type(dtype=i.type.dtype).make_variable()
c:\program files\git\theano\theano\tensor\basic.py in
as_tensor_variable(x, name, ndim)
206 except Exception:
207 str_x = repr(x)
--> 208 raise AsTensorError("Cannot convert %s to TensorType" % str_x, type(x))
209
210 # this has a different name, because _as_tensor_variable is the
AsTensorError: ('Cannot convert FromFunctionOp{rate1} to TensorType',
)
How i handle this?
The second thing - when i'm using the pymc3.switch function like this:
with pymc3.Model() as dis:
switchpoint = pymc3.DiscreteUniform('switchpoint',lower=0, upper=len(data)-1)
mu1 = pymc3.Exponential('mu1', lam=1.)
mu2 = pymc3.Exponential('mu2',lam=1.)
rate1 = pymc3.switch(switchpoint >= np.arange(len(data)), mu1,mu2)
disasters=pymc3.Poisson('disasters', mu=rate1, observed = data)
And next try to sample:
with dis:
step1 = pymc3.NUTS([mu1, mu2])
step2 = pymc3.Metropolis([switchpoint])
trace = pymc3.sample(10000, step = [step1,step2])
I get an error:
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
c:\program files\git\theano\theano\compile\function_module.py in __call__(self, *args, **kwargs)
858 try:
--> 859 outputs = self.fn()
860 except Exception:
TypeError: expected type_num 9 (NPY_INT64) got 7
During handling of the above exception, another exception occurred:
TypeError Traceback (most recent call last)
<ipython-input-4-3247d908f897> in <module>()
2 step1 = pymc3.NUTS([mu1, mu2])
3 step2 = pymc3.Metropolis([switchpoint])
----> 4 trace = pymc3.sample(10000, step = [step1,step2])
C:\Users\User\Anaconda3\lib\site-packages\pymc3\sampling.py in sample(draws, step, start, trace, chain, njobs, tune, progressbar, model, random_seed)
153 sample_args = [draws, step, start, trace, chain,
154 tune, progressbar, model, random_seed]
--> 155 return sample_func(*sample_args)
156
157
C:\Users\User\Anaconda3\lib\site-packages\pymc3\sampling.py in _sample(draws, step, start, trace, chain, tune, progressbar, model, random_seed)
162 progress = progress_bar(draws)
163 try:
--> 164 for i, strace in enumerate(sampling):
165 if progressbar:
166 progress.update(i)
C:\Users\User\Anaconda3\lib\site-packages\pymc3\sampling.py in _iter_sample(draws, step, start, trace, chain, tune, model, random_seed)
244 if i == tune:
245 step = stop_tuning(step)
--> 246 point = step.step(point)
247 strace.record(point)
248 yield strace
C:\Users\User\Anaconda3\lib\site-packages\pymc3\step_methods\compound.py in step(self, point)
11 def step(self, point):
12 for method in self.methods:
---> 13 point = method.step(point)
14 return point
C:\Users\User\Anaconda3\lib\site-packages\pymc3\step_methods\arraystep.py in step(self, point)
116 bij = DictToArrayBijection(self.ordering, point)
117
--> 118 apoint = self.astep(bij.map(point))
119 return bij.rmap(apoint)
120
C:\Users\User\Anaconda3\lib\site-packages\pymc3\step_methods\metropolis.py in astep(self, q0)
123
124
--> 125 q_new = metrop_select(self.delta_logp(q,q0), q, q0)
126
127 if q_new is q:
c:\program files\git\theano\theano\compile\function_module.py in __call__(self, *args, **kwargs)
869 node=self.fn.nodes[self.fn.position_of_error],
870 thunk=thunk,
--> 871 storage_map=getattr(self.fn, 'storage_map', None))
872 else:
873 # old-style linkers raise their own exceptions
c:\program files\git\theano\theano\gof\link.py in raise_with_op(node, thunk, exc_info, storage_map)
312 # extra long error message in that case.
313 pass
--> 314 reraise(exc_type, exc_value, exc_trace)
315
316
C:\Users\User\Anaconda3\lib\site-packages\six.py in reraise(tp, value, tb)
656 value = tp()
657 if value.__traceback__ is not tb:
--> 658 raise value.with_traceback(tb)
659 raise value
660
c:\program files\git\theano\theano\compile\function_module.py in __call__(self, *args, **kwargs)
857 t0_fn = time.time()
858 try:
--> 859 outputs = self.fn()
860 except Exception:
861 if hasattr(self.fn, 'position_of_error'):
TypeError: expected type_num 9 (NPY_INT64) got 7
Apply node that caused the error: Elemwise{Composite{Switch(GE(i0, i1), i2, i3)}}(InplaceDimShuffle{x}.0, TensorConstant{[ 0 1..1098 1099]}, InplaceDimShuffle{x}.0, InplaceDimShuffle{x}.0)
Toposort index: 11
Inputs types: [TensorType(int64, (True,)), TensorType(int32, vector), TensorType(float64, (True,)), TensorType(float64, (True,))]
Inputs shapes: [(1,), (1100,), (1,), (1,)]
Inputs strides: [(4,), (4,), (8,), (8,)]
Inputs values: [array([549]), 'not shown', array([ 1.07762995]), array([ 1.01502801])]
Outputs clients: [[Elemwise{eq,no_inplace}(Elemwise{Composite{Switch(GE(i0, i1), i2, i3)}}.0, TensorConstant{(1,) of 0}), Elemwise{Composite{Switch(GE(i0, i1), ((Switch(i2, i3, (i4 * log(i0))) - i5) - i0), i3)}}[(0, 0)](Elemwise{Composite{Switch(GE(i0, i1), i2, i3)}}.0, TensorConstant{(1,) of 0}, InplaceDimShuffle{x}.0, TensorConstant{(1,) of -inf}, TensorConstant{[ 13. 13... 0. 1.]}, TensorConstant{[ 22.55216... ]})]]
HINT: Re-running with most Theano optimization disabled could give you a back-trace of when this node was created. This can be done with by setting the Theano flag 'optimizer=fast_compile'. If that does not work, Theano optimizations can be disabled with 'optimizer=None'.
HINT: Use the Theano flag 'exception_verbosity=high' for a debugprint and storage map footprint of this apply node.
Being simple analyst, should i learn all this stuff about theano to being able to work with my statistical problems? Is a new mcmc sampler with gradient feature is only one thing that should motivates me to switch from pymc2 to pymc3?

For your first question, it looks like you're trying to pass a theano function as a variable. You need to call the function with the other variables as arguments, which will then return a theano variable. Try changing your line to
disasters=pymc3.Poisson('disasters', mu=rate1(switchpoint, mu1, mu2), observed = data)
I couldn't reproduce the error in your second part; the sampling worked just fine for me.

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

Specifying linear regression in python - python

Related

Custom scaler transformer fit error only when used in ColumnTransformer pipeline

Scipy "masked arrays are not supported" error

category_encoders: TargetEncoder error "TypeError: Categorical cannot perform the operation mean"

Stats Models Logit().fit() function throwing an error LinAlgError: Singular matrix

pymc3: Disaster example with deterministic switchpoint function

Categories

Resources