Related
I have trouble using OneVsRestClassifier and cross-validation from sklearn.
train, test = train_test_split(tickets, random_state=42, test_size=0.30, shuffle=True)
X_train = train[['TK_Poids_brut', 'TK_Poids_tare']]
y_train = train['TK_Qualite']
X_test = test[['TK_Poids_brut', 'TK_Poids_tare']]
y_test = test['TK_Qualite']
le = preprocessing.LabelEncoder()
y_train_tra = le.fit_transform(y_train)
printDataInfo(X_train,y_train_tra)
#The printDataInfo function is there just to display information about X and y
clf_OvR_SVC = OneVsRestClassifier(LinearSVC(random_state=0))
cross_v = cross_validate(clf_OvR_SVC, X_train, y_train_tra, error_score="raise",scoring=dict(ac=make_scorer(accuracy_score), roc=make_scorer(roc_auc_score, multi_class="ovr")), cv=5)
cross_v
When I do this I get the following error:
---------------------------------------------------------------------------
AxisError Traceback (most recent call last)
C:\TEMP/ipykernel_20332/2926737612.py in <module>
23
24 clf_OvR_SVC = OneVsRestClassifier(LinearSVC(random_state=0))
---> 25 cross_v = cross_validate(clf_OvR_SVC, X_train, y_train_tra ,error_score="raise",scoring=dict(ac=make_scorer(accuracy_score), roc=make_scorer(roc_auc_score, multi_class="ovr")), cv=5)
26 cross_v
~\Anaconda3\lib\site-packages\sklearn\utils\validation.py in inner_f(*args, **kwargs)
61 extra_args = len(args) - len(all_args)
62 if extra_args <= 0:
---> 63 return f(*args, **kwargs)
64
65 # extra_args > 0
~\Anaconda3\lib\site-packages\sklearn\model_selection\_validation.py in cross_validate(estimator, X, y, groups, scoring, cv, n_jobs, verbose, fit_params, pre_dispatch, return_train_score, return_estimator, error_score)
248 parallel = Parallel(n_jobs=n_jobs, verbose=verbose,
249 pre_dispatch=pre_dispatch)
--> 250 results = parallel(
251 delayed(_fit_and_score)(
252 clone(estimator), X, y, scorers, train, test, verbose, None,
~\Anaconda3\lib\site-packages\joblib\parallel.py in __call__(self, iterable)
1041 # remaining jobs.
1042 self._iterating = False
-> 1043 if self.dispatch_one_batch(iterator):
1044 self._iterating = self._original_iterator is not None
1045
~\Anaconda3\lib\site-packages\joblib\parallel.py in dispatch_one_batch(self, iterator)
859 return False
860 else:
--> 861 self._dispatch(tasks)
862 return True
863
~\Anaconda3\lib\site-packages\joblib\parallel.py in _dispatch(self, batch)
777 with self._lock:
778 job_idx = len(self._jobs)
--> 779 job = self._backend.apply_async(batch, callback=cb)
780 # A job can complete so quickly than its callback is
781 # called before we get here, causing self._jobs to
~\Anaconda3\lib\site-packages\joblib\_parallel_backends.py in apply_async(self, func, callback)
206 def apply_async(self, func, callback=None):
207 """Schedule a func to be run"""
--> 208 result = ImmediateResult(func)
209 if callback:
210 callback(result)
~\Anaconda3\lib\site-packages\joblib\_parallel_backends.py in __init__(self, batch)
570 # Don't delay the application, to avoid keeping the input
571 # arguments in memory
--> 572 self.results = batch()
573
574 def get(self):
~\Anaconda3\lib\site-packages\joblib\parallel.py in __call__(self)
260 # change the default number of processes to -1
261 with parallel_backend(self._backend, n_jobs=self._n_jobs):
--> 262 return [func(*args, **kwargs)
263 for func, args, kwargs in self.items]
264
~\Anaconda3\lib\site-packages\joblib\parallel.py in <listcomp>(.0)
260 # change the default number of processes to -1
261 with parallel_backend(self._backend, n_jobs=self._n_jobs):
--> 262 return [func(*args, **kwargs)
263 for func, args, kwargs in self.items]
264
~\Anaconda3\lib\site-packages\sklearn\utils\fixes.py in __call__(self, *args, **kwargs)
220 def __call__(self, *args, **kwargs):
221 with config_context(**self.config):
--> 222 return self.function(*args, **kwargs)
~\Anaconda3\lib\site-packages\sklearn\model_selection\_validation.py in _fit_and_score(estimator, X, y, scorer, train, test, verbose, parameters, fit_params, return_train_score, return_parameters, return_n_test_samples, return_times, return_estimator, split_progress, candidate_progress, error_score)
623
624 fit_time = time.time() - start_time
--> 625 test_scores = _score(estimator, X_test, y_test, scorer, error_score)
626 score_time = time.time() - start_time - fit_time
627 if return_train_score:
~\Anaconda3\lib\site-packages\sklearn\model_selection\_validation.py in _score(estimator, X_test, y_test, scorer, error_score)
685 scores = scorer(estimator, X_test)
686 else:
--> 687 scores = scorer(estimator, X_test, y_test)
688 except Exception:
689 if error_score == 'raise':
~\Anaconda3\lib\site-packages\sklearn\metrics\_scorer.py in __call__(self, estimator, *args, **kwargs)
85 for name, scorer in self._scorers.items():
86 if isinstance(scorer, _BaseScorer):
---> 87 score = scorer._score(cached_call, estimator,
88 *args, **kwargs)
89 else:
~\Anaconda3\lib\site-packages\sklearn\metrics\_scorer.py in _score(self, method_caller, estimator, X, y_true, sample_weight)
240 **self._kwargs)
241 else:
--> 242 return self._sign * self._score_func(y_true, y_pred,
243 **self._kwargs)
244
~\Anaconda3\lib\site-packages\sklearn\utils\validation.py in inner_f(*args, **kwargs)
61 extra_args = len(args) - len(all_args)
62 if extra_args <= 0:
---> 63 return f(*args, **kwargs)
64
65 # extra_args > 0
~\Anaconda3\lib\site-packages\sklearn\metrics\_ranking.py in roc_auc_score(y_true, y_score, average, sample_weight, max_fpr, multi_class, labels)
535 if multi_class == 'raise':
536 raise ValueError("multi_class must be in ('ovo', 'ovr')")
--> 537 return _multiclass_roc_auc_score(y_true, y_score, labels,
538 multi_class, average, sample_weight)
539 elif y_type == "binary":
~\Anaconda3\lib\site-packages\sklearn\metrics\_ranking.py in _multiclass_roc_auc_score(y_true, y_score, labels, multi_class, average, sample_weight)
593 """
594 # validation of the input y_score
--> 595 if not np.allclose(1, y_score.sum(axis=1)):
596 raise ValueError(
597 "Target scores need to be probabilities for multiclass "
~\Anaconda3\lib\site-packages\numpy\core\_methods.py in _sum(a, axis, dtype, out, keepdims, initial, where)
45 def _sum(a, axis=None, dtype=None, out=None, keepdims=False,
46 initial=_NoValue, where=True):
---> 47 return umr_sum(a, axis, dtype, out, keepdims, initial, where)
48
49 def _prod(a, axis=None, dtype=None, out=None, keepdims=False,
AxisError: axis 1 is out of bounds for array of dimension 1
Here is the input data format:
I already tried to put both in numpy array and I tried to reshape y in (6108,1) but I always get the same error.
type :
x: <class 'pandas.core.frame.DataFrame'>
y: <class 'numpy.ndarray'>
shape :
X: (6108, 2)
y: (6108,)
data :
x: TK_Poids_brut TK_Poids_tare
8436 14420 14160
7014 17160 12320
3931 28060 15040
6749 16680 14360
2984 10060 9100
... ... ...
5734 19700 15420
5191 25380 14620
5390 19460 14760
860 16160 14100
7270 15520 14500
[6108 rows x 2 columns]
y: [132 85 160 118 118 40 88 126 12 40 41 138 5 125 125 147 111 118
153 40 118 126 118 125 123 62 177 45 118 105 3 1 105 142 116 100
118 125 118 78 124 3 126 53 138 118 40 118 53 124 126 98 118 155
118 131 5 135 130 3 118 105 118 126 105 87 118 118 24 124 130 130
...
118 124 118 180 118 58 124 126 153 126 124 118 125 153 86 94 126 118
130 105 42 62 124 78]
I am trying to visualize 5-D mix data using bubble charts by leveraging the concepts of hue, size and depth.
I used the following code:
fig = plt.figure(figsize=(8, 6))
ax = fig.add_subplot(111, projection='3d')
t = fig.suptitle('Wine Residual Sugar - Alcohol Content - Acidity - Total Sulfur Dioxide - Type', fontsize=14)
xs = list(iris_data['SepalLengthCm'])
ys = list(iris_data['SepalWidthCm'])
zs = list(iris_data['PetalLengthCm'])
data_points = [(x, y, z) for x, y, z in zip(xs, ys, zs)]
ss = list(iris_data['PetalWidthCm'])
colors = ['red' if wt == 'red' else 'yellow' for wt in list(iris_data['Species'])]
for data, color, size in zip(data_points, colors, ss):
x, y, z = data
ax.scatter(x, y, z, alpha=0.4, c=color, edgecolors='none', s=size)
ax.set_xlabel('Sepal Length')
ax.set_ylabel('Sepal Width')
ax.set_zlabel('Petal Length')
But it is giving the following error:
operands could not be broadcast together with remapped shapes [original->remapped]: (0,4) and requested shape (1,4)
<Figure size 576x432 with 1 Axes>
I am adding the full trace of the error I am getting:
Text(0.5, 0, 'Petal Length')
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
~\anaconda\lib\site-packages\IPython\core\formatters.py in __call__(self, obj)
339 pass
340 else:
--> 341 return printer(obj)
342 # Finally look for special method names
343 method = get_real_method(obj, self.print_method)
~\anaconda\lib\site-packages\IPython\core\pylabtools.py in <lambda>(fig)
246
247 if 'png' in formats:
--> 248 png_formatter.for_type(Figure, lambda fig: print_figure(fig, 'png', **kwargs))
249 if 'retina' in formats or 'png2x' in formats:
250 png_formatter.for_type(Figure, lambda fig: retina_figure(fig, **kwargs))
~\anaconda\lib\site-packages\IPython\core\pylabtools.py in print_figure(fig, fmt, bbox_inches, **kwargs)
130 FigureCanvasBase(fig)
131
--> 132 fig.canvas.print_figure(bytes_io, **kw)
133 data = bytes_io.getvalue()
134 if fmt == 'svg':
~\anaconda\lib\site-packages\matplotlib\backend_bases.py in print_figure(self, filename, dpi, facecolor, edgecolor, orientation, format, bbox_inches, pad_inches, bbox_extra_artists, backend, **kwargs)
2191 else suppress())
2192 with ctx:
-> 2193 self.figure.draw(renderer)
2194
2195 bbox_inches = self.figure.get_tightbbox(
~\anaconda\lib\site-packages\matplotlib\artist.py in draw_wrapper(artist, renderer, *args, **kwargs)
39 renderer.start_filter()
40
---> 41 return draw(artist, renderer, *args, **kwargs)
42 finally:
43 if artist.get_agg_filter() is not None:
~\anaconda\lib\site-packages\matplotlib\figure.py in draw(self, renderer)
1861
1862 self.patch.draw(renderer)
-> 1863 mimage._draw_list_compositing_images(
1864 renderer, self, artists, self.suppressComposite)
1865
~\anaconda\lib\site-packages\matplotlib\image.py in _draw_list_compositing_images(renderer, parent, artists, suppress_composite)
129 if not_composite or not has_images:
130 for a in artists:
--> 131 a.draw(renderer)
132 else:
133 # Composite any adjacent images together
~\anaconda\lib\site-packages\matplotlib\artist.py in draw_wrapper(artist, renderer, *args, **kwargs)
39 renderer.start_filter()
40
---> 41 return draw(artist, renderer, *args, **kwargs)
42 finally:
43 if artist.get_agg_filter() is not None:
~\anaconda\lib\site-packages\mpl_toolkits\mplot3d\axes3d.py in draw(self, renderer)
443 for axis in self._get_axis_list()) + 1
444 for i, col in enumerate(
--> 445 sorted(self.collections,
446 key=lambda col: col.do_3d_projection(renderer),
447 reverse=True)):
~\anaconda\lib\site-packages\mpl_toolkits\mplot3d\axes3d.py in <lambda>(col)
444 for i, col in enumerate(
445 sorted(self.collections,
--> 446 key=lambda col: col.do_3d_projection(renderer),
447 reverse=True)):
448 col.zorder = zorder_offset + i
~\anaconda\lib\site-packages\mpl_toolkits\mplot3d\art3d.py in do_3d_projection(self, renderer)
492 fcs = (_zalpha(self._facecolor3d, vzs) if self._depthshade else
493 self._facecolor3d)
--> 494 ecs = (_zalpha(self._edgecolor3d, vzs) if self._depthshade else
495 self._edgecolor3d)
496 sizes = self._sizes3d
~\anaconda\lib\site-packages\mpl_toolkits\mplot3d\art3d.py in _zalpha(colors, zs)
808 norm = Normalize(min(zs), max(zs))
809 sats = 1 - norm(zs) * 0.7
--> 810 rgba = np.broadcast_to(mcolors.to_rgba_array(colors), (len(zs), 4))
811 return np.column_stack([rgba[:, :3], rgba[:, 3] * sats])
<__array_function__ internals> in broadcast_to(*args, **kwargs)
~\anaconda\lib\site-packages\numpy\lib\stride_tricks.py in broadcast_to(array, shape, subok)
178 [1, 2, 3]])
179 """
--> 180 return _broadcast_to(array, shape, subok=subok, readonly=True)
181
182
~\anaconda\lib\site-packages\numpy\lib\stride_tricks.py in _broadcast_to(array, shape, subok, readonly)
121 'negative')
122 extras = []
--> 123 it = np.nditer(
124 (array,), flags=['multi_index', 'refs_ok', 'zerosize_ok'] + extras,
125 op_flags=['readonly'], itershape=shape, order='C')
ValueError: operands could not be broadcast together with remapped shapes [original->remapped]: (0,4) and requested shape (1,4)
<Figure size 576x432 with 1 Axes>
This is my code/model that I'm trying to implement:
kf = KFold(n_splits=10,shuffle=True,random_state=2652124)
transf = TfidfVectorizer(analyzer='word', token_pattern=r'\w{1,}',max_features=1500, min_df=5, max_df=0.7, stop_words=stop)
scaler = MinMaxScaler(feature_range=(0, 1))
metadados = ['F13','F14','F19','F21','F22']
cls = RandomForestClassifier(n_estimators=1000,random_state=0)
features = make_column_transformer(
(transf,'textimage'),(transf,'subtitle'),
(scaler, metadata),(scaler,'F3'),remainder ='drop')
X = features.fit_transform(data)
y = data['classification']
for train_index, test_index in kf.split(X):
X_train, X_test = X[train_index], X[test_index]
y_train, y_test = y[train_index], y[test_index]
cls.fit(X_train,y_train)
y_score = cls.fit(X_train, y_train).predict_proba(X_test)
roc = roc_auc_score(y_test, y_score[:,1])
pred = cls.predict(X_test)
acs = accuracy_score(y_test,pred)
clr = classification_report(y_test,pred)
The error:
TypeError Traceback (most recent call last)
TypeError: only size-1 arrays can be converted to Python scalars
The above exception was the direct cause of the following exception:
ValueError Traceback (most recent call last)
<ipython-input-13-6bdcb91ff478> in <module>
14
15 kfnum = 1
---> 16 X = features.fit_transform(data)
17 y = data['classe']
18 catr = 'timagem + metadados + legenda'
~/.local/lib/python3.8/site-packages/sklearn/compose/_column_transformer.py in fit_transform(self, X, y)
529 self._validate_remainder(X)
530
--> 531 result = self._fit_transform(X, y, _fit_transform_one)
532
533 if not result:
~/.local/lib/python3.8/site-packages/sklearn/compose/_column_transformer.py in _fit_transform(self, X, y, func, fitted)
456 self._iter(fitted=fitted, replace_strings=True))
457 try:
--> 458 return Parallel(n_jobs=self.n_jobs)(
459 delayed(func)(
460 transformer=clone(trans) if not fitted else trans,
~/.local/lib/python3.8/site-packages/joblib/parallel.py in __call__(self, iterable)
1049 self._iterating = self._original_iterator is not None
1050
-> 1051 while self.dispatch_one_batch(iterator):
1052 pass
1053
~/.local/lib/python3.8/site-packages/joblib/parallel.py in dispatch_one_batch(self, iterator)
864 return False
865 else:
--> 866 self._dispatch(tasks)
867 return True
868
~/.local/lib/python3.8/site-packages/joblib/parallel.py in _dispatch(self, batch)
782 with self._lock:
783 job_idx = len(self._jobs)
--> 784 job = self._backend.apply_async(batch, callback=cb)
785 # A job can complete so quickly than its callback is
786 # called before we get here, causing self._jobs to
~/.local/lib/python3.8/site-packages/joblib/_parallel_backends.py in apply_async(self, func, callback)
206 def apply_async(self, func, callback=None):
207 """Schedule a func to be run"""
--> 208 result = ImmediateResult(func)
209 if callback:
210 callback(result)
~/.local/lib/python3.8/site-packages/joblib/_parallel_backends.py in __init__(self, batch)
570 # Don't delay the application, to avoid keeping the input
571 # arguments in memory
--> 572 self.results = batch()
573
574 def get(self):
~/.local/lib/python3.8/site-packages/joblib/parallel.py in __call__(self)
260 # change the default number of processes to -1
261 with parallel_backend(self._backend, n_jobs=self._n_jobs):
--> 262 return [func(*args, **kwargs)
263 for func, args, kwargs in self.items]
264
~/.local/lib/python3.8/site-packages/joblib/parallel.py in <listcomp>(.0)
260 # change the default number of processes to -1
261 with parallel_backend(self._backend, n_jobs=self._n_jobs):
--> 262 return [func(*args, **kwargs)
263 for func, args, kwargs in self.items]
264
~/.local/lib/python3.8/site-packages/sklearn/pipeline.py in _fit_transform_one(transformer, X, y, weight, message_clsname, message, **fit_params)
738 with _print_elapsed_time(message_clsname, message):
739 if hasattr(transformer, 'fit_transform'):
--> 740 res = transformer.fit_transform(X, y, **fit_params)
741 else:
742 res = transformer.fit(X, y, **fit_params).transform(X)
~/.local/lib/python3.8/site-packages/sklearn/base.py in fit_transform(self, X, y, **fit_params)
688 if y is None:
689 # fit method of arity 1 (unsupervised transformation)
--> 690 return self.fit(X, **fit_params).transform(X)
691 else:
692 # fit method of arity 2 (supervised transformation)
~/.local/lib/python3.8/site-packages/sklearn/preprocessing/_data.py in fit(self, X, y)
334 # Reset internal state before fitting
335 self._reset()
--> 336 return self.partial_fit(X, y)
337
338 def partial_fit(self, X, y=None):
~/.local/lib/python3.8/site-packages/sklearn/preprocessing/_data.py in partial_fit(self, X, y)
367
368 first_pass = not hasattr(self, 'n_samples_seen_')
--> 369 X = self._validate_data(X, reset=first_pass,
370 estimator=self, dtype=FLOAT_DTYPES,
371 force_all_finite="allow-nan")
~/.local/lib/python3.8/site-packages/sklearn/base.py in _validate_data(self, X, y, reset, validate_separately, **check_params)
418 f"requires y to be passed, but the target y is None."
419 )
--> 420 X = check_array(X, **check_params)
421 out = X
422 else:
~/.local/lib/python3.8/site-packages/sklearn/utils/validation.py in inner_f(*args, **kwargs)
70 FutureWarning)
71 kwargs.update({k: arg for k, arg in zip(sig.parameters, args)})
---> 72 return f(**kwargs)
73 return inner_f
74
~/.local/lib/python3.8/site-packages/sklearn/utils/validation.py in check_array(array, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, estimator)
596 array = array.astype(dtype, casting="unsafe", copy=False)
597 else:
--> 598 array = np.asarray(array, order=order, dtype=dtype)
599 except ComplexWarning:
600 raise ValueError("Complex data not supported\n"
~/.local/lib/python3.8/site-packages/numpy/core/_asarray.py in asarray(a, dtype, order)
81
82 """
---> 83 return array(a, dtype, copy=False, order=order)
84
85
~/.local/lib/python3.8/site-packages/pandas/core/series.py in __array__(self, dtype)
795 dtype='datetime64[ns]')
796 """
--> 797 return np.asarray(self.array, dtype)
798
799 # ----------------------------------------------------------------------
~/.local/lib/python3.8/site-packages/numpy/core/_asarray.py in asarray(a, dtype, order)
81
82 """
---> 83 return array(a, dtype, copy=False, order=order)
84
85
~/.local/lib/python3.8/site-packages/pandas/core/arrays/numpy_.py in __array__(self, dtype)
209
210 def __array__(self, dtype=None) -> np.ndarray:
--> 211 return np.asarray(self._ndarray, dtype=dtype)
212
213 _HANDLED_TYPES = (np.ndarray, numbers.Number)
~/.local/lib/python3.8/site-packages/numpy/core/_asarray.py in asarray(a, dtype, order)
81
82 """
---> 83 return array(a, dtype, copy=False, order=order)
84
85
ValueError: setting an array element with a sequence.
I have no problemas using only:
features = make_column_transformer(
(transf,'textimage'),(transf,'subtitle'),
(scaler, metadata),remainder ='drop')
So my problem is the column 'F3' in my dataframe, which is an array in each row:
0 [0.0026778684, 0.003117677, 0.00040434036, 0.0...
1 [0.061992627, 0.047432333, 0.012270351, 0.0102...
2 [0.0, 0.0, 0.0, 4.3830705e-06, 1.3149212e-05, ...
3 [0.30314153, 0.04477268, 0.01840577, 0.0319251...
4 [0.2563626, 0.03259786, 0.018686974, 0.0198365...
...
1287 [0.11471527, 0.032394826, 0.012400794, 0.01131...
1288 [0.002138354, 0.001044489, 0.0007786191, 0.001...
1289 [0.056204572, 0.026556363, 0.02082041, 0.01966...
1290 [0.051759016, 0.0058623934, 0.0054726205, 0.00...
1291 [0.0, 5.4140626e-05, 4.4114586e-05, 4.8125003e...
Name: F3, Length: 1292, dtype: object
Can anyone help me with that? How can I change a column into a list into a pipeline, or how can I concatenate the tranform with a list? Any suggestions?
I was doing some regression experiments using pandas and numpy. The package only support numpy array.
I have two kinds of data in the dataset, continuous and categorical.
As independent variable
pandas dataframe: re
What I do to continuous:
built_year=re.built_year.values.reshape((-1,1))
Input: >>>built_year.shape
Output:(4508, 1)
What I do to categorical ones:
condition=pd.factorize(re.condition.values)[0].reshape((-1,1))
Input: >>>condition.shape
Output:(4508, 1)
For Dependent variable
Y = re.price.values.reshape((-1,1))
Then generate 'equation'
X = np.hstack([condition,built_year)]
then put Y and X into package method as parameter.
Whenever I only account for continuous variable, everything is fine.
As long as I account for categorical variables, it gives error:
~/miniconda3/lib/python3.6/site-packages/PySAL-1.14.3-py3.6.egg/pysal/contrib/gwr/sel_bw.py in search(self, search, criterion, bw_min, bw_max, interval, tol, max_iter)
202 self.int_score = int_score
203
--> 204 self._bw()
205
206 return self.bw[0]
~/miniconda3/lib/python3.6/site-packages/PySAL-1.14.3-py3.6.egg/pysal/contrib/gwr/sel_bw.py in _bw(self)
215 delta = 0.38197 #1 - (np.sqrt(5.0)-1.0)/2.0
216 self.bw = golden_section(a, c, delta, gwr_func, self.tol,
--> 217 self.max_iter, self.int_score)
218 elif self.search == 'interval':
219 self.bw = equal_interval(self.bw_min, self.bw_max, self.interval,
~/miniconda3/lib/python3.6/site-packages/PySAL-1.14.3-py3.6.egg/pysal/contrib/gwr/search.py in golden_section(a, c, delta, function, tol, max_iter, int_score)
51 d = np.round(d)
52
---> 53 score_a = function(a)
54 score_b = function(b)
55 score_c = function(c)
~/miniconda3/lib/python3.6/site-packages/PySAL-1.14.3-py3.6.egg/pysal/contrib/gwr/sel_bw.py in <lambda>(bw)
209 gwr_func = lambda bw: getDiag[self.criterion](
210 GWR(self.coords, self.y, self.X_loc, bw, family=self.family,
--> 211 kernel=self.kernel, fixed=self.fixed, constant=self.constant).fit())
212 if self.search == 'golden_section':
213 a,c = self._init_section(self.X_glob, self.X_loc, self.coords,
~/miniconda3/lib/python3.6/site-packages/PySAL-1.14.3-py3.6.egg/pysal/contrib/gwr/gwr.py in fit(self, ini_params, tol, max_iter, solve)
259 wi = self.W[i].reshape((-1,1))
260 rslt = iwls(self.y, self.X, self.family, self.offset, None,
--> 261 ini_params, tol, max_iter, wi=wi)
262 params[i,:] = rslt[0].T
263 predy[i] = rslt[1][i]
~/miniconda3/lib/python3.6/site-packages/PySAL-1.14.3-py3.6.egg/pysal/contrib/glm/iwls.py in iwls(y, x, family, offset, y_fix, ini_betas, tol, max_iter, wi)
74 n_betas = _compute_betas(wz, wx)
75 else:
---> 76 n_betas, xtx_inv_xt = _compute_betas_gwr(wz, wx, wi)
77 v = spdot(x, n_betas)
78 mu = family.fitted(v)
~/miniconda3/lib/python3.6/site-packages/PySAL-1.14.3-py3.6.egg/pysal/contrib/glm/iwls.py in _compute_betas_gwr(y, x, wi)
32 xT = (x * wi).T
33 xtx = np.dot(xT, x)
---> 34 xtx_inv = la.inv(xtx)
35 xtx_inv_xt = np.dot(xtx_inv, xT)
36 betas = np.dot(xtx_inv_xt, y)
~/miniconda3/lib/python3.6/site-packages/numpy/linalg/linalg.py in inv(a)
511 signature = 'D->D' if isComplexType(t) else 'd->d'
512 extobj = get_linalg_error_extobj(_raise_linalgerror_singular)
--> 513 ainv = _umath_linalg.inv(a, signature=signature, extobj=extobj)
514 return wrap(ainv.astype(result_t, copy=False))
515
~/miniconda3/lib/python3.6/site-packages/numpy/linalg/linalg.py in _raise_linalgerror_singular(err, flag)
88
89 def _raise_linalgerror_singular(err, flag):
---> 90 raise LinAlgError("Singular matrix")
91
92 def _raise_linalgerror_nonposdef(err, flag):
LinAlgError: Singular matrix
I'm running Pandas 0.17.1 & Python 2.7.11 and I've got a pandas dataframe, shown below, with 3311 rows and 7 columns. It's fairly sparse as each column represents a sensor sampling at a diffferent rate (anywhere from 1Hz to 32Hz)
When I go to plot it using df.plot() I get a long callback (below) with a few MemoryErrors sprinkled in.
MemoryError Traceback (most recent call last)
<ipython-input-17-4f76bcc16520> in <module>()
----> 1 df.plot()
C:\Anaconda2\lib\site-packages\pandas\tools\plotting.pyc in __call__(self, x, y, kind, ax, subplots, sharex, sharey, layout, figsize, use_index, title, grid, legend, style, logx, logy, loglog, xticks, yticks, xlim, ylim, rot, fontsize, colormap, table, yerr, xerr, secondary_y, sort_columns, **kwds)
3669 fontsize=fontsize, colormap=colormap, table=table,
3670 yerr=yerr, xerr=xerr, secondary_y=secondary_y,
-> 3671 sort_columns=sort_columns, **kwds)
3672 __call__.__doc__ = plot_frame.__doc__
3673
C:\Anaconda2\lib\site-packages\pandas\tools\plotting.pyc in plot_frame(data, x, y, kind, ax, subplots, sharex, sharey, layout, figsize, use_index, title, grid, legend, style, logx, logy, loglog, xticks, yticks, xlim, ylim, rot, fontsize, colormap, table, yerr, xerr, secondary_y, sort_columns, **kwds)
2554 yerr=yerr, xerr=xerr,
2555 secondary_y=secondary_y, sort_columns=sort_columns,
-> 2556 **kwds)
2557
2558
C:\Anaconda2\lib\site-packages\pandas\tools\plotting.pyc in _plot(data, x, y, subplots, ax, kind, **kwds)
2382 plot_obj = klass(data, subplots=subplots, ax=ax, kind=kind, **kwds)
2383
-> 2384 plot_obj.generate()
2385 plot_obj.draw()
2386 return plot_obj.result
C:\Anaconda2\lib\site-packages\pandas\tools\plotting.pyc in generate(self)
991
992 for ax in self.axes:
--> 993 self._post_plot_logic_common(ax, self.data)
994 self._post_plot_logic(ax, self.data)
995
C:\Anaconda2\lib\site-packages\pandas\tools\plotting.pyc in _post_plot_logic_common(self, ax, data)
1119 ax.set_xticklabels(xticklabels)
1120 self._apply_axis_properties(ax.xaxis, rot=self.rot,
-> 1121 fontsize=self.fontsize)
1122 self._apply_axis_properties(ax.yaxis, fontsize=self.fontsize)
1123 elif self.orientation == 'horizontal':
C:\Anaconda2\lib\site-packages\pandas\tools\plotting.pyc in _apply_axis_properties(self, axis, rot, fontsize)
1167
1168 def _apply_axis_properties(self, axis, rot=None, fontsize=None):
-> 1169 labels = axis.get_majorticklabels() + axis.get_minorticklabels()
1170 for label in labels:
1171 if rot is not None:
C:\Anaconda2\lib\site-packages\matplotlib\axis.pyc in get_majorticklabels(self)
1159 def get_majorticklabels(self):
1160 'Return a list of Text instances for the major ticklabels'
-> 1161 ticks = self.get_major_ticks()
1162 labels1 = [tick.label1 for tick in ticks if tick.label1On]
1163 labels2 = [tick.label2 for tick in ticks if tick.label2On]
C:\Anaconda2\lib\site-packages\matplotlib\axis.pyc in get_major_ticks(self, numticks)
1288 'get the tick instances; grow as necessary'
1289 if numticks is None:
-> 1290 numticks = len(self.get_major_locator()())
1291 if len(self.majorTicks) < numticks:
1292 # update the new tick label properties from the old
C:\Anaconda2\lib\site-packages\pandas\tseries\converter.pyc in __call__(self)
901 vmin, vmax = vmax, vmin
902 if self.isdynamic:
--> 903 locs = self._get_default_locs(vmin, vmax)
904 else: # pragma: no cover
905 base = self.base
C:\Anaconda2\lib\site-packages\pandas\tseries\converter.pyc in _get_default_locs(self, vmin, vmax)
882
883 if self.plot_obj.date_axis_info is None:
--> 884 self.plot_obj.date_axis_info = self.finder(vmin, vmax, self.freq)
885
886 locator = self.plot_obj.date_axis_info
C:\Anaconda2\lib\site-packages\pandas\tseries\converter.pyc in _daily_finder(vmin, vmax, freq)
505 Period(ordinal=int(vmax), freq=freq))
506 span = vmax.ordinal - vmin.ordinal + 1
--> 507 dates_ = PeriodIndex(start=vmin, end=vmax, freq=freq)
508 # Initialize the output
509 info = np.zeros(span,
C:\Anaconda2\lib\site-packages\pandas\tseries\period.pyc in __new__(cls, data, ordinal, freq, start, end, periods, copy, name, tz, **kwargs)
181 else:
182 data, freq = cls._generate_range(start, end, periods,
--> 183 freq, kwargs)
184 else:
185 ordinal, freq = cls._from_arraylike(data, freq, tz)
C:\Anaconda2\lib\site-packages\pandas\tseries\period.pyc in _generate_range(cls, start, end, periods, freq, fields)
195 raise ValueError('Can either instantiate from fields '
196 'or endpoints, but not both')
--> 197 subarr, freq = _get_ordinal_range(start, end, periods, freq)
198 elif field_count > 0:
199 subarr, freq = _range_from_fields(freq=freq, **fields)
C:\Anaconda2\lib\site-packages\pandas\tseries\period.pyc in _get_ordinal_range(start, end, periods, freq, mult)
992 dtype=np.int64)
993 else:
--> 994 data = np.arange(start.ordinal, end.ordinal + 1, mult, dtype=np.int64)
995
996 return data, freq
MemoryError:
Error in callback <function post_execute at 0x000000000A2304A8> (for post_execute):
---------------------------------------------------------------------------
MemoryError Traceback (most recent call last)
C:\Anaconda2\lib\site-packages\matplotlib\pyplot.pyc in post_execute()
145 def post_execute():
146 if matplotlib.is_interactive():
--> 147 draw_all()
148
149 # IPython >= 2
C:\Anaconda2\lib\site-packages\matplotlib\_pylab_helpers.pyc in draw_all(cls, force)
148 for f_mgr in cls.get_all_fig_managers():
149 if force or f_mgr.canvas.figure.stale:
--> 150 f_mgr.canvas.draw_idle()
151
152 atexit.register(Gcf.destroy_all)
C:\Anaconda2\lib\site-packages\matplotlib\backend_bases.pyc in draw_idle(self, *args, **kwargs)
2024 if not self._is_idle_drawing:
2025 with self._idle_draw_cntx():
-> 2026 self.draw(*args, **kwargs)
2027
2028 def draw_cursor(self, event):
C:\Anaconda2\lib\site-packages\matplotlib\backends\backend_agg.pyc in draw(self)
472
473 try:
--> 474 self.figure.draw(self.renderer)
475 finally:
476 RendererAgg.lock.release()
C:\Anaconda2\lib\site-packages\matplotlib\artist.pyc in draw_wrapper(artist, renderer, *args, **kwargs)
59 def draw_wrapper(artist, renderer, *args, **kwargs):
60 before(artist, renderer)
---> 61 draw(artist, renderer, *args, **kwargs)
62 after(artist, renderer)
63
C:\Anaconda2\lib\site-packages\matplotlib\figure.pyc in draw(self, renderer)
1131 dsu.sort(key=itemgetter(0))
1132 for zorder, a, func, args in dsu:
-> 1133 func(*args)
1134
1135 renderer.close_group('figure')
C:\Anaconda2\lib\site-packages\matplotlib\artist.pyc in draw_wrapper(artist, renderer, *args, **kwargs)
59 def draw_wrapper(artist, renderer, *args, **kwargs):
60 before(artist, renderer)
---> 61 draw(artist, renderer, *args, **kwargs)
62 after(artist, renderer)
63
C:\Anaconda2\lib\site-packages\matplotlib\axes\_base.pyc in draw(self, renderer, inframe)
2302
2303 for zorder, a in dsu:
-> 2304 a.draw(renderer)
2305
2306 renderer.close_group('axes')
C:\Anaconda2\lib\site-packages\matplotlib\artist.pyc in draw_wrapper(artist, renderer, *args, **kwargs)
59 def draw_wrapper(artist, renderer, *args, **kwargs):
60 before(artist, renderer)
---> 61 draw(artist, renderer, *args, **kwargs)
62 after(artist, renderer)
63
C:\Anaconda2\lib\site-packages\matplotlib\axis.pyc in draw(self, renderer, *args, **kwargs)
1104 renderer.open_group(__name__)
1105
-> 1106 ticks_to_draw = self._update_ticks(renderer)
1107 ticklabelBoxes, ticklabelBoxes2 = self._get_tick_bboxes(ticks_to_draw,
1108 renderer)
C:\Anaconda2\lib\site-packages\matplotlib\axis.pyc in _update_ticks(self, renderer)
947
948 interval = self.get_view_interval()
--> 949 tick_tups = [t for t in self.iter_ticks()]
950 if self._smart_bounds:
951 # handle inverted limits
C:\Anaconda2\lib\site-packages\matplotlib\axis.pyc in iter_ticks(self)
890 Iterate through all of the major and minor ticks.
891 """
--> 892 majorLocs = self.major.locator()
893 majorTicks = self.get_major_ticks(len(majorLocs))
894 self.major.formatter.set_locs(majorLocs)
C:\Anaconda2\lib\site-packages\pandas\tseries\converter.pyc in __call__(self)
901 vmin, vmax = vmax, vmin
902 if self.isdynamic:
--> 903 locs = self._get_default_locs(vmin, vmax)
904 else: # pragma: no cover
905 base = self.base
C:\Anaconda2\lib\site-packages\pandas\tseries\converter.pyc in _get_default_locs(self, vmin, vmax)
882
883 if self.plot_obj.date_axis_info is None:
--> 884 self.plot_obj.date_axis_info = self.finder(vmin, vmax, self.freq)
885
886 locator = self.plot_obj.date_axis_info
C:\Anaconda2\lib\site-packages\pandas\tseries\converter.pyc in _daily_finder(vmin, vmax, freq)
505 Period(ordinal=int(vmax), freq=freq))
506 span = vmax.ordinal - vmin.ordinal + 1
--> 507 dates_ = PeriodIndex(start=vmin, end=vmax, freq=freq)
508 # Initialize the output
509 info = np.zeros(span,
C:\Anaconda2\lib\site-packages\pandas\tseries\period.pyc in __new__(cls, data, ordinal, freq, start, end, periods, copy, name, tz, **kwargs)
181 else:
182 data, freq = cls._generate_range(start, end, periods,
--> 183 freq, kwargs)
184 else:
185 ordinal, freq = cls._from_arraylike(data, freq, tz)
C:\Anaconda2\lib\site-packages\pandas\tseries\period.pyc in _generate_range(cls, start, end, periods, freq, fields)
195 raise ValueError('Can either instantiate from fields '
196 'or endpoints, but not both')
--> 197 subarr, freq = _get_ordinal_range(start, end, periods, freq)
198 elif field_count > 0:
199 subarr, freq = _range_from_fields(freq=freq, **fields)
C:\Anaconda2\lib\site-packages\pandas\tseries\period.pyc in _get_ordinal_range(start, end, periods, freq, mult)
992 dtype=np.int64)
993 else:
--> 994 data = np.arange(start.ordinal, end.ordinal + 1, mult, dtype=np.int64)
995
996 return data, freq
MemoryError:
---------------------------------------------------------------------------
MemoryError Traceback (most recent call last)
C:\Anaconda2\lib\site-packages\IPython\core\formatters.pyc in __call__(self, obj)
335 pass
336 else:
--> 337 return printer(obj)
338 # Finally look for special method names
339 method = _safe_get_formatter_method(obj, self.print_method)
C:\Anaconda2\lib\site-packages\IPython\core\pylabtools.pyc in <lambda>(fig)
205
206 if 'png' in formats:
--> 207 png_formatter.for_type(Figure, lambda fig: print_figure(fig, 'png', **kwargs))
208 if 'retina' in formats or 'png2x' in formats:
209 png_formatter.for_type(Figure, lambda fig: retina_figure(fig, **kwargs))
C:\Anaconda2\lib\site-packages\IPython\core\pylabtools.pyc in print_figure(fig, fmt, bbox_inches, **kwargs)
115
116 bytes_io = BytesIO()
--> 117 fig.canvas.print_figure(bytes_io, **kw)
118 data = bytes_io.getvalue()
119 if fmt == 'svg':
C:\Anaconda2\lib\site-packages\matplotlib\backend_bases.pyc in print_figure(self, filename, dpi, facecolor, edgecolor, orientation, format, **kwargs)
2176 orientation=orientation,
2177 dryrun=True,
-> 2178 **kwargs)
2179 renderer = self.figure._cachedRenderer
2180 bbox_inches = self.figure.get_tightbbox(renderer)
C:\Anaconda2\lib\site-packages\matplotlib\backends\backend_agg.pyc in print_png(self, filename_or_obj, *args, **kwargs)
525
526 def print_png(self, filename_or_obj, *args, **kwargs):
--> 527 FigureCanvasAgg.draw(self)
528 renderer = self.get_renderer()
529 original_dpi = renderer.dpi
C:\Anaconda2\lib\site-packages\matplotlib\backends\backend_agg.pyc in draw(self)
472
473 try:
--> 474 self.figure.draw(self.renderer)
475 finally:
476 RendererAgg.lock.release()
C:\Anaconda2\lib\site-packages\matplotlib\artist.pyc in draw_wrapper(artist, renderer, *args, **kwargs)
59 def draw_wrapper(artist, renderer, *args, **kwargs):
60 before(artist, renderer)
---> 61 draw(artist, renderer, *args, **kwargs)
62 after(artist, renderer)
63
C:\Anaconda2\lib\site-packages\matplotlib\figure.pyc in draw(self, renderer)
1131 dsu.sort(key=itemgetter(0))
1132 for zorder, a, func, args in dsu:
-> 1133 func(*args)
1134
1135 renderer.close_group('figure')
C:\Anaconda2\lib\site-packages\matplotlib\artist.pyc in draw_wrapper(artist, renderer, *args, **kwargs)
59 def draw_wrapper(artist, renderer, *args, **kwargs):
60 before(artist, renderer)
---> 61 draw(artist, renderer, *args, **kwargs)
62 after(artist, renderer)
63
C:\Anaconda2\lib\site-packages\matplotlib\axes\_base.pyc in draw(self, renderer, inframe)
2302
2303 for zorder, a in dsu:
-> 2304 a.draw(renderer)
2305
2306 renderer.close_group('axes')
C:\Anaconda2\lib\site-packages\matplotlib\artist.pyc in draw_wrapper(artist, renderer, *args, **kwargs)
59 def draw_wrapper(artist, renderer, *args, **kwargs):
60 before(artist, renderer)
---> 61 draw(artist, renderer, *args, **kwargs)
62 after(artist, renderer)
63
C:\Anaconda2\lib\site-packages\matplotlib\axis.pyc in draw(self, renderer, *args, **kwargs)
1104 renderer.open_group(__name__)
1105
-> 1106 ticks_to_draw = self._update_ticks(renderer)
1107 ticklabelBoxes, ticklabelBoxes2 = self._get_tick_bboxes(ticks_to_draw,
1108 renderer)
C:\Anaconda2\lib\site-packages\matplotlib\axis.pyc in _update_ticks(self, renderer)
947
948 interval = self.get_view_interval()
--> 949 tick_tups = [t for t in self.iter_ticks()]
950 if self._smart_bounds:
951 # handle inverted limits
C:\Anaconda2\lib\site-packages\matplotlib\axis.pyc in iter_ticks(self)
890 Iterate through all of the major and minor ticks.
891 """
--> 892 majorLocs = self.major.locator()
893 majorTicks = self.get_major_ticks(len(majorLocs))
894 self.major.formatter.set_locs(majorLocs)
C:\Anaconda2\lib\site-packages\pandas\tseries\converter.pyc in __call__(self)
901 vmin, vmax = vmax, vmin
902 if self.isdynamic:
--> 903 locs = self._get_default_locs(vmin, vmax)
904 else: # pragma: no cover
905 base = self.base
C:\Anaconda2\lib\site-packages\pandas\tseries\converter.pyc in _get_default_locs(self, vmin, vmax)
882
883 if self.plot_obj.date_axis_info is None:
--> 884 self.plot_obj.date_axis_info = self.finder(vmin, vmax, self.freq)
885
886 locator = self.plot_obj.date_axis_info
C:\Anaconda2\lib\site-packages\pandas\tseries\converter.pyc in _daily_finder(vmin, vmax, freq)
505 Period(ordinal=int(vmax), freq=freq))
506 span = vmax.ordinal - vmin.ordinal + 1
--> 507 dates_ = PeriodIndex(start=vmin, end=vmax, freq=freq)
508 # Initialize the output
509 info = np.zeros(span,
C:\Anaconda2\lib\site-packages\pandas\tseries\period.pyc in __new__(cls, data, ordinal, freq, start, end, periods, copy, name, tz, **kwargs)
181 else:
182 data, freq = cls._generate_range(start, end, periods,
--> 183 freq, kwargs)
184 else:
185 ordinal, freq = cls._from_arraylike(data, freq, tz)
C:\Anaconda2\lib\site-packages\pandas\tseries\period.pyc in _generate_range(cls, start, end, periods, freq, fields)
195 raise ValueError('Can either instantiate from fields '
196 'or endpoints, but not both')
--> 197 subarr, freq = _get_ordinal_range(start, end, periods, freq)
198 elif field_count > 0:
199 subarr, freq = _range_from_fields(freq=freq, **fields)
C:\Anaconda2\lib\site-packages\pandas\tseries\period.pyc in _get_ordinal_range(start, end, periods, freq, mult)
992 dtype=np.int64)
993 else:
--> 994 data = np.arange(start.ordinal, end.ordinal + 1, mult, dtype=np.int64)
995
996 return data, freq
MemoryError:
<matplotlib.figure.Figure at 0xdcbd1d0>
Interestingly, if I instead use
import matplotlib.pyploy as plt
plt.plot(df)
Then I actually get a plot, though the only column that shows anything on the graph is the bvp column, and it strangely has a 1e10 on the x-axis (even though, as mentioned above, the data series is only 3311 long and going up to 51.718750s).
Not too sure where to go from here. Any thoughts?