I have data, which has 2 input columns and 42 output columns. Here is what my code looks like:
data_columns=["value","average"]
prediction_columns=[]
for i in range(43):
prediction_columns.append("s"+str(i))
from keras.preprocessing.sequence import TimeseriesGenerator
n_input = 24*4 #how many samples/rows/timesteps to look in the past in order to forecast the next sample
n_features= len(prediction_columns)#X_train.shape[1] # how many predictors/Xs/features we have to predict y
b_size = 7 # Number of timeseries samples in each batch
generator = TimeseriesGenerator(columns[data_columns], columns[prediction_columns], length=n_input, batch_size=b_size)
print(generator[0][0].shape)
This fails with following:
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
File /usr/lib/python3.10/site-packages/pandas/core/indexes/base.py:3621, in Index.get_loc(self, key, method, tolerance)
3620 try:
-> 3621 return self._engine.get_loc(casted_key)
3622 except KeyError as err:
File /usr/lib/python3.10/site-packages/pandas/_libs/index.pyx:136, in pandas._libs.index.IndexEngine.get_loc()
File /usr/lib/python3.10/site-packages/pandas/_libs/index.pyx:163, in pandas._libs.index.IndexEngine.get_loc()
File pandas/_libs/hashtable_class_helper.pxi:5198, in pandas._libs.hashtable.PyObjectHashTable.get_item()
File pandas/_libs/hashtable_class_helper.pxi:5206, in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 96
The above exception was the direct cause of the following exception:
KeyError Traceback (most recent call last)
Input In [38], in <cell line: 8>()
5 b_size = 7 # Number of timeseries samples in each batch
6 generator = TimeseriesGenerator(columns[data_columns], columns[prediction_columns], length=n_input, batch_size=b_size)
----> 8 print(generator[0][0].shape)
File /usr/lib/python3.10/site-packages/keras/preprocessing/sequence.py:176, in __getitem__(self, index)
172 rows = np.random.randint(
173 self.start_index, self.end_index + 1, size=self.batch_size
174 )
175 else:
--> 176 i = self.start_index + self.batch_size * self.stride * index
177 rows = np.arange(
178 i,
179 min(i + self.batch_size * self.stride, self.end_index + 1),
180 self.stride,
181 )
183 samples = np.array(
184 [
185 self.data[row - self.length : row : self.sampling_rate]
186 for row in rows
187 ]
188 )
File /usr/lib/python3.10/site-packages/keras/preprocessing/sequence.py:176, in <listcomp>(.0)
172 rows = np.random.randint(
173 self.start_index, self.end_index + 1, size=self.batch_size
174 )
175 else:
--> 176 i = self.start_index + self.batch_size * self.stride * index
177 rows = np.arange(
178 i,
179 min(i + self.batch_size * self.stride, self.end_index + 1),
180 self.stride,
181 )
183 samples = np.array(
184 [
185 self.data[row - self.length : row : self.sampling_rate]
186 for row in rows
187 ]
188 )
File /usr/lib/python3.10/site-packages/pandas/core/frame.py:3505, in DataFrame.__getitem__(self, key)
3503 if self.columns.nlevels > 1:
3504 return self._getitem_multilevel(key)
-> 3505 indexer = self.columns.get_loc(key)
3506 if is_integer(indexer):
3507 indexer = [indexer]
File /usr/lib/python3.10/site-packages/pandas/core/indexes/base.py:3623, in Index.get_loc(self, key, method, tolerance)
3621 return self._engine.get_loc(casted_key)
3622 except KeyError as err:
-> 3623 raise KeyError(key) from err
3624 except TypeError:
3625 # If we have a listlike key, _check_indexing_error will raise
3626 # InvalidIndexError. Otherwise we fall through and re-raise
3627 # the TypeError.
3628 self._check_indexing_error(key)
KeyError: 96
I thought that maybe I'm providing invalid column names, but both columns[data_columns].head() and columns[prediction_columns].head() execute without issues
Columns shape is (42749, 45)
What could be the source of the problem?
Problem went away after I converted dataframes to numpy
generator = TimeseriesGenerator(work[data_columns].to_numpy(), work[prediction_columns].to_numpy(), length=n_input, batch_size=b_size)
Related
possibly a silly problem, but I'm stuck and I'd really appreciate someone's help. I'm trying to replicate the examples given in the walkthrough tutorial for trackpy (https://soft-matter.github.io/trackpy/dev/tutorial/walkthrough.html). I've been succesful up until I got to the point where I had to fit the ensemble mean-squared displacement to a power law (In [34]):
plt.figure()
plt.ylabel(r'$\langle \Delta r^2 \rangle$ [$\mu$m$^2$]')
plt.xlabel('lag time $t$');
tp.utils.fit_powerlaw(em) # performs linear best fit in log space, plots]
This is the error message I get. Can someone suggest a solution?
Thanks!
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
File ~/opt/anaconda3/lib/python3.9/site-packages/pandas/core/indexes/base.py:3621, in Index.get_loc(self, key, method, tolerance)
3620 try:
-> 3621 return self._engine.get_loc(casted_key)
3622 except KeyError as err:
File ~/opt/anaconda3/lib/python3.9/site-packages/pandas/_libs/index.pyx:136, in pandas._libs.index.IndexEngine.get_loc()
File ~/opt/anaconda3/lib/python3.9/site-packages/pandas/_libs/index.pyx:142, in pandas._libs.index.IndexEngine.get_loc()
TypeError: '(slice(None, None, None), None)' is an invalid key
During handling of the above exception, another exception occurred:
InvalidIndexError Traceback (most recent call last)
Input In [178], in <cell line: 4>()
2 plt.ylabel(r'$\langle \Delta r^2 \rangle$ [$\mu$m$^2$]')
3 plt.xlabel('lag time $t$');
----> 4 tp.utils.fit_powerlaw(em)
File ~/opt/anaconda3/lib/python3.9/site-packages/trackpy/utils.py:50, in fit_powerlaw(data, plot, **kwargs)
48 if plot:
49 from trackpy import plots
---> 50 plots.fit(data, fits, logx=True, logy=True, legend=False, **kwargs)
51 return values
File ~/opt/anaconda3/lib/python3.9/site-packages/trackpy/plots.py:50, in make_axes.<locals>.wrapper(*args, **kwargs)
47 # Delete legend keyword so remaining ones can be passed to plot().
48 legend = kwargs.pop('legend', False)
---> 50 result = func(*args, **kwargs)
52 if legend:
53 handles, labels = kwargs['ax'].get_legend_handles_labels()
File ~/opt/anaconda3/lib/python3.9/site-packages/trackpy/plots.py:652, in fit(data, fits, inverted_model, logx, logy, ax, **kwargs)
650 ax.set_yscale('log')
651 if not inverted_model:
--> 652 fitlines = ax.plot(fits.index, fits, **kwargs)
653 else:
654 fitlines = ax.plot(fits.reindex(data.dropna().index),
655 data.dropna(), **kwargs)
File ~/opt/anaconda3/lib/python3.9/site-packages/matplotlib/axes/_axes.py:1632, in Axes.plot(self, scalex, scaley, data, *args, **kwargs)
1390 """
1391 Plot y versus x as lines and/or markers.
1392
(...)
1629 (``'green'``) or hex strings (``'#008000'``).
1630 """
1631 kwargs = cbook.normalize_kwargs(kwargs, mlines.Line2D)
-> 1632 lines = [*self._get_lines(*args, data=data, **kwargs)]
1633 for line in lines:
1634 self.add_line(line)
File ~/opt/anaconda3/lib/python3.9/site-packages/matplotlib/axes/_base.py:312, in _process_plot_var_args.__call__(self, data, *args, **kwargs)
310 this += args[0],
311 args = args[1:]
--> 312 yield from self._plot_args(this, kwargs)
File ~/opt/anaconda3/lib/python3.9/site-packages/matplotlib/axes/_base.py:488, in _process_plot_var_args._plot_args(self, tup, kwargs, return_kwargs)
486 if len(xy) == 2:
487 x = _check_1d(xy[0])
--> 488 y = _check_1d(xy[1])
489 else:
490 x, y = index_of(xy[-1])
File ~/opt/anaconda3/lib/python3.9/site-packages/matplotlib/cbook/__init__.py:1327, in _check_1d(x)
1321 with warnings.catch_warnings(record=True) as w:
1322 warnings.filterwarnings(
1323 "always",
1324 category=Warning,
1325 message='Support for multi-dimensional indexing')
-> 1327 ndim = x[:, None].ndim
1328 # we have definitely hit a pandas index or series object
1329 # cast to a numpy array.
1330 if len(w) > 0:
File ~/opt/anaconda3/lib/python3.9/site-packages/pandas/core/frame.py:3505, in DataFrame.__getitem__(self, key)
3503 if self.columns.nlevels > 1:
3504 return self._getitem_multilevel(key)
-> 3505 indexer = self.columns.get_loc(key)
3506 if is_integer(indexer):
3507 indexer = [indexer]
File ~/opt/anaconda3/lib/python3.9/site-packages/pandas/core/indexes/base.py:3628, in Index.get_loc(self, key, method, tolerance)
3623 raise KeyError(key) from err
3624 except TypeError:
3625 # If we have a listlike key, _check_indexing_error will raise
3626 # InvalidIndexError. Otherwise we fall through and re-raise
3627 # the TypeError.
-> 3628 self._check_indexing_error(key)
3629 raise
3631 # GH#42269
File ~/opt/anaconda3/lib/python3.9/site-packages/pandas/core/indexes/base.py:5637, in Index._check_indexing_error(self, key)
5633 def _check_indexing_error(self, key):
5634 if not is_scalar(key):
5635 # if key is not a scalar, directly raise an error (the code below
5636 # would convert to numpy arrays and raise later any way) - GH29926
-> 5637 raise InvalidIndexError(key)
InvalidIndexError: (slice(None, None, None), None)
I have 400 columns and I am trying to do min-max normalization (row-wise). For the first 200 points I want to do min-max normalization and scale it between 0 and 500 and do the same for the next two hundred points but scale it between 0 and 10.
import pandas as pd
import numpy as np
df = pd.DataFrame(np.random.randint(500,1000,size=(5, 400)))
def normalize(ds,value):
normalizedds = []
normalizedds.extend((ds[:value] - np.min(ds[:value])) / np.max(ds[:value] - np.min(ds[:value])) * 500)
normalizedds.extend(ds[value:value*2] / np.max(ds[value:value*2]) * 10)
return normalizedds
normalizeddsList = pd.DataFrame.from_records(df.apply(normalize, value=200, axis=1))
I get the following error!
ValueError Traceback (most recent call last)
~\AppData\Roaming\Python\Python37\site-packages\pandas\core\indexes\base.py in get_slice_bound(self, label, side, kind)
5166 try:
-> 5167 return self._searchsorted_monotonic(label, side)
5168 except ValueError:
~\AppData\Roaming\Python\Python37\site-packages\pandas\core\indexes\base.py in _searchsorted_monotonic(self, label, side)
5127
-> 5128 raise ValueError("index must be monotonic increasing or decreasing")
5129
ValueError: index must be monotonic increasing or decreasing
During handling of the above exception, another exception occurred:
KeyError Traceback (most recent call last)
in
----> 1 scaledCardList = pd.DataFrame.from_records(originalCardList.apply(scale, pointCount=200, axis=1))
~\AppData\Roaming\Python\Python37\site-packages\pandas\core\frame.py in apply(self, func, axis, broadcast, raw, reduce, result_type, args, **kwds)
6926 kwds=kwds,
6927 )
-> 6928 return op.get_result()
6929
6930 def applymap(self, func):
~\AppData\Roaming\Python\Python37\site-packages\pandas\core\apply.py in get_result(self)
184 return self.apply_raw()
185
--> 186 return self.apply_standard()
187
188 def apply_empty_result(self):
~\AppData\Roaming\Python\Python37\site-packages\pandas\core\apply.py in apply_standard(self)
290
291 # compute the result using the series generator
--> 292 self.apply_series_generator()
293
294 # wrap results
~\AppData\Roaming\Python\Python37\site-packages\pandas\core\apply.py in apply_series_generator(self)
319 try:
320 for i, v in enumerate(series_gen):
--> 321 results[i] = self.f(v)
322 keys.append(v.name)
323 except Exception as e:
~\AppData\Roaming\Python\Python37\site-packages\pandas\core\apply.py in f(x)
110
111 def f(x):
--> 112 return func(x, *args, **kwds)
113
114 else:
in scale(card, pointCount)
1 def scale(card, pointCount):
2 scaledCard = []
----> 3 scaledCard.extend((card[:pointCount] - np.min(card[:pointCount])) / np.max(card[:pointCount] - np.min(card[:pointCount])) * 10000)
4 scaledCard.extend(card[pointCount:pointCount*2] / np.max(card[pointCount:pointCount*2]) * 100)
5 return scaledCard
~\AppData\Roaming\Python\Python37\site-packages\pandas\core\series.py in getitem(self, key)
1111 key = check_bool_indexer(self.index, key)
1112
-> 1113 return self._get_with(key)
1114
1115 def _get_with(self, key):
~\AppData\Roaming\Python\Python37\site-packages\pandas\core\series.py in _get_with(self, key)
1116 # other: fancy integer or otherwise
1117 if isinstance(key, slice):
-> 1118 indexer = self.index._convert_slice_indexer(key, kind="getitem")
1119 return self._get_values(indexer)
1120 elif isinstance(key, ABCDataFrame):
~\AppData\Roaming\Python\Python37\site-packages\pandas\core\indexes\numeric.py in _convert_slice_indexer(self, key, kind)
395
396 # translate to locations
--> 397 return self.slice_indexer(key.start, key.stop, key.step, kind=kind)
398
399 def _format_native_types(
~\AppData\Roaming\Python\Python37\site-packages\pandas\core\indexes\base.py in slice_indexer(self, start, end, step, kind)
5032 slice(1, 3)
5033 """
-> 5034 start_slice, end_slice = self.slice_locs(start, end, step=step, kind=kind)
5035
5036 # return a slice
~\AppData\Roaming\Python\Python37\site-packages\pandas\core\indexes\base.py in slice_locs(self, start, end, step, kind)
5252 end_slice = None
5253 if end is not None:
-> 5254 end_slice = self.get_slice_bound(end, "right", kind)
5255 if end_slice is None:
5256 end_slice = len(self)
~\AppData\Roaming\Python\Python37\site-packages\pandas\core\indexes\base.py in get_slice_bound(self, label, side, kind)
5168 except ValueError:
5169 # raise the original KeyError
-> 5170 raise err
5171
5172 if isinstance(slc, np.ndarray):
~\AppData\Roaming\Python\Python37\site-packages\pandas\core\indexes\base.py in get_slice_bound(self, label, side, kind)
5162 # we need to look up the label
5163 try:
-> 5164 slc = self.get_loc(label)
5165 except KeyError as err:
5166 try:
~\AppData\Roaming\Python\Python37\site-packages\pandas\core\indexes\numeric.py in get_loc(self, key, method, tolerance)
477 except (TypeError, NotImplementedError):
478 pass
--> 479 return super().get_loc(key, method=method, tolerance=tolerance)
480
481 #cache_readonly
~\AppData\Roaming\Python\Python37\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
2897 return self._engine.get_loc(key)
2898 except KeyError:
-> 2899 return self._engine.get_loc(self._maybe_cast_indexer(key))
2900 indexer = self.get_indexer([key], method=method, tolerance=tolerance)
2901 if indexer.ndim > 1 or indexer.size > 1:
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.Float64HashTable.get_item()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.Float64HashTable.get_item()
KeyError: (200.0, 'occurred at index 0')
Hey I have a very short question. I need to load data for my machine learning course, but it does not work for me and I have no idea why. Im using Jupyter with Python 3.
My Code:
from sklearn.datasets import fetch_covtype
forest = fetch_covtype()
For my friend it works fine with the same conditions. I already tried to update sklearn with pip install -U scikit-learn, but it did not solve the problem. I hope somebody can help me.
It creates the following error:
UnboundLocalError Traceback (most recent call last)
/opt/conda/lib/python3.7/site-packages/sklearn/datasets/covtype.py in fetch_covtype(data_home, download_if_missing, random_state, shuffle, return_X_y)
126 try:
--> 127 X, y
128 except NameError:
UnboundLocalError: local variable 'X' referenced before assignment
During handling of the above exception, another exception occurred:
ValueError Traceback (most recent call last)
<ipython-input-9-fb303a92b6ca> in <module>
----> 1 forest =fetch_covtype()
/opt/conda/lib/python3.7/site-packages/sklearn/datasets/covtype.py in fetch_covtype(data_home, download_if_missing, random_state, shuffle, return_X_y)
127 X, y
128 except NameError:
--> 129 X, y = _refresh_cache([samples_path, targets_path], 9)
130 # TODO: Revert to the following two lines in v0.23
131 # X = joblib.load(samples_path)
/opt/conda/lib/python3.7/site-packages/sklearn/datasets/base.py in _refresh_cache(files, compress)
928 msg = "sklearn.externals.joblib is deprecated in 0.21"
929 with warnings.catch_warnings(record=True) as warns:
--> 930 data = tuple([joblib.load(f) for f in files])
931
932 refresh_needed = any([str(x.message).startswith(msg) for x in warns])
/opt/conda/lib/python3.7/site-packages/sklearn/datasets/base.py in <listcomp>(.0)
928 msg = "sklearn.externals.joblib is deprecated in 0.21"
929 with warnings.catch_warnings(record=True) as warns:
--> 930 data = tuple([joblib.load(f) for f in files])
931
932 refresh_needed = any([str(x.message).startswith(msg) for x in warns])
/opt/conda/lib/python3.7/site-packages/joblib/numpy_pickle.py in load(filename, mmap_mode)
603 return load_compatibility(fobj)
604
--> 605 obj = _unpickle(fobj, filename, mmap_mode)
606
607 return obj
/opt/conda/lib/python3.7/site-packages/joblib/numpy_pickle.py in _unpickle(fobj, filename, mmap_mode)
527 obj = None
528 try:
--> 529 obj = unpickler.load()
530 if unpickler.compat_mode:
531 warnings.warn("The file '%s' has been generated with a "
/opt/conda/lib/python3.7/pickle.py in load(self)
1083 raise EOFError
1084 assert isinstance(key, bytes_types)
-> 1085 dispatch[key[0]](self)
1086 except _Stop as stopinst:
1087 return stopinst.value
/opt/conda/lib/python3.7/site-packages/joblib/numpy_pickle.py in load_build(self)
353 if isinstance(array_wrapper, NDArrayWrapper):
354 self.compat_mode = True
--> 355 self.stack.append(array_wrapper.read(self))
356
357 # Be careful to register our new method.
/opt/conda/lib/python3.7/site-packages/joblib/numpy_pickle.py in read(self, unpickler)
196 array = self.read_mmap(unpickler)
197 else:
--> 198 array = self.read_array(unpickler)
199
200 # Manage array subclass case
/opt/conda/lib/python3.7/site-packages/joblib/numpy_pickle.py in read_array(self, unpickler)
147 read_size = int(read_count * self.dtype.itemsize)
148 data = _read_bytes(unpickler.file_handle,
--> 149 read_size, "array data")
150 array[i:i + read_count] = \
151 unpickler.np.frombuffer(data, dtype=self.dtype,
/opt/conda/lib/python3.7/site-packages/joblib/numpy_pickle_utils.py in _read_bytes(fp, size, error_template)
241 if len(data) != size:
242 msg = "EOF: reading %s, expected %d bytes got %d"
--> 243 raise ValueError(msg % (error_template, size, len(data)))
244 else:
245 return data
ValueError: EOF: reading array data, expected 262144 bytes got 209661
The little piece of code is giving me more trouble than necessary. I would appreciate any help if I could. Thank you in advance for taking a look at this for me.
I am trying to sum up the price and the volume from the previous 22 data points which I added a column named dtt that holds this value. The formula that I am trying to represent here is:
vamp = [sum(1, dtt) price * volume] / [sum(1,dtt) volume]
Here is my code
# Import necessary libraries
import numpy as np
import pandas as pd
import os
# Load SPY dataset
spy_data = pd.read_csv('SPY.csv', parse_dates=['Date'])
# Compute Daily Return
spy_data['daily_ret'] = (spy_data['Adj Close'] - (spy_data['Adj Close']).shift(1)) / ((spy_data['Adj Close']).shift(1)) * 100
spy_data['daily_ret'] = spy_data['daily_ret'].fillna(0.0)
# calculate Annualized Volatility
rsquare = (spy_data['daily_ret']) ** 2
spy_data['annualized_volatility']=(np.sqrt(rsquare.rolling(252).sum() / 251) * np.sqrt(252))
spy_data['annualized_volatility'] = spy_data['annualized_volatility'].fillna(0)
spy_shares = 889112600
# Calculate Days to Trade
spy_data['dtt'] = spy_shares / (spy_data['Volume'].rolling(22).sum()/22)
spy_data['dtt'] = spy_data['dtt'].fillna(1).astype(int)
# Calculate VWAP
#numerator is equal to the sum of the price * volume of the latest DTT
numerator = spy_data.loc[0:spy_data['dtt'], 'Adj Close'].sum()#*spy_data.loc[0:spy_data['dtt'], 'Volume']
#denominator = spy_data.loc[0:spy_data['dtt'], 'Volume']
#spy_data['vwap'] = numerator / denominator
print(spy_data)
I commented out the other lines because I was trying to problem solve it one step at a time. The price column that I need is the Adj Close.
No matter what I try in terms of slicing I keep getting an error. This is the current one:
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-68-ade975138e12> in <module>
7 # Calculate VWAP
8 #numerator is equal to the sum of the price * volume of the latest DTT
----> 9 numerator = spy_data.loc[0:spy_data['dtt']].sum()#*spy_data[0:spy_data['dtt'], 'volume']
10 #denominator = spy_data.loc[0:spy_data['dtt'], 'Volume']
11 #spy_data['vwap'] = numerator / denominator
~/.local/lib/python3.6/site-packages/pandas/core/indexing.py in __getitem__(self, key)
1408
1409 maybe_callable = com.apply_if_callable(key, self.obj)
-> 1410 return self._getitem_axis(maybe_callable, axis=axis)
1411
1412 def _is_scalar_access(self, key: Tuple):
~/.local/lib/python3.6/site-packages/pandas/core/indexing.py in _getitem_axis(self, key, axis)
1770 if isinstance(key, slice):
1771 self._validate_key(key, axis)
-> 1772 return self._get_slice_axis(key, axis=axis)
1773 elif com.is_bool_indexer(key):
1774 return self._getbool_axis(key, axis=axis)
~/.local/lib/python3.6/site-packages/pandas/core/indexing.py in _get_slice_axis(self, slice_obj, axis)
1438 labels = obj._get_axis(axis)
1439 indexer = labels.slice_indexer(
-> 1440 slice_obj.start, slice_obj.stop, slice_obj.step, kind=self.name
1441 )
1442
~/.local/lib/python3.6/site-packages/pandas/core/indexes/base.py in slice_indexer(self, start, end, step, kind)
5025 slice(1, 3)
5026 """
-> 5027 start_slice, end_slice = self.slice_locs(start, end, step=step, kind=kind)
5028
5029 # return a slice
~/.local/lib/python3.6/site-packages/pandas/core/indexes/base.py in slice_locs(self, start, end, step, kind)
5245 end_slice = None
5246 if end is not None:
-> 5247 end_slice = self.get_slice_bound(end, "right", kind)
5248 if end_slice is None:
5249 end_slice = len(self)
~/.local/lib/python3.6/site-packages/pandas/core/indexes/base.py in get_slice_bound(self, label, side, kind)
5155 # we need to look up the label
5156 try:
-> 5157 slc = self.get_loc(label)
5158 except KeyError as err:
5159 try:
~/.local/lib/python3.6/site-packages/pandas/core/indexes/range.py in get_loc(self, key, method, tolerance)
377 except ValueError:
378 raise KeyError(key)
--> 379 return super().get_loc(key, method=method, tolerance=tolerance)
380
381 #Appender(_index_shared_docs["get_indexer"])
~/.local/lib/python3.6/site-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
2888 )
2889 try:
-> 2890 return self._engine.get_loc(key)
2891 except KeyError:
2892 return self._engine.get_loc(self._maybe_cast_indexer(key))
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
TypeError: '0 1
1 1
2 1
3 1
4 1
..
4694 9
4695 9
4696 10
4697 10
4698 11
Name: dtt, Length: 4699, dtype: int64' is an invalid key
I am attaching the code . I am trying to replicate the code given in the link link. I am trying this code on my data. Below is the code and error that i am getting. Sorry for the bad formation of the question I tried to insert the code in a proper way but it doesnt seem to work for some commands.
search = {'algorithm': {'k-nn': {'n_neighbors': [1, 5]},
'SVM': {'kernel': {'linear': {'C': [0, 2]},
'rbf': {'gamma': [0, 1], 'C': [0, 10]},
'poly': {'degree': [2, 5], 'C': [0, 50], 'coef0': [0, 1]}
}
},
'naive-bayes': None,
'random-forest': {'n_estimators': [10, 30],
'max_features': [5, 20]}
}
}
#optunity.cross_validated(x=data, y=labels, num_folds=5)
def performance(x_train, y_train, x_test, y_test,
algorithm, n_neighbors=None, n_estimators=None, max_features=None,
kernel=None, C=None, gamma=None, degree=None, coef0=None):
# fit the model
if algorithm == 'k-nn':
model = KNeighborsClassifier(n_neighbors=int(n_neighbors))
model.fit(x_train, y_train)
elif algorithm == 'naive-bayes':
model = GaussianNB()
model.fit(x_train, y_train)
elif algorithm == 'random-forest':
model = RandomForestClassifier(n_estimators=int(n_estimators),
max_features=int(max_features))
model.fit(x_train, y_train)
# predict the test set
if algorithm == 'k-nn':
predictions = model.predict_proba(x_test)[:, 1]
else:
predictions = model.predict_proba(x_test)[:, 1]
return optunity.metrics.roc_auc(y_test, predictions, positive=True)
optimal_configuration, info, _ = optunity.maximize_structured(performance,
search_space=search,
num_evals=300)
This is the error message that I am getting
TypeError Traceback (most recent call last)
C:\ProgramData\Anaconda3\lib\site-packages\optunity\cross_validation.py in select(collection, indices)
76 try:
---> 77 return collection[indices, ...]
78 except IndexError: # caused by scipy.sparse in some versions
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\frame.py in __getitem__(self, key)
1963 else:
-> 1964 return self._getitem_column(key)
1965
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\frame.py in _getitem_column(self, key)
1970 if self.columns.is_unique:
-> 1971 return self._get_item_cache(key)
1972
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\generic.py in _get_item_cache(self, item)
1642 cache = self._item_cache
-> 1643 res = cache.get(item)
1644 if res is None:
TypeError: unhashable type: 'list'
During handling of the above exception, another exception occurred:
KeyError Traceback (most recent call last)
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
2441 try:
-> 2442 return self._engine.get_loc(key)
2443 except KeyError:
pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 26770
During handling of the above exception, another exception occurred:
KeyError Traceback (most recent call last)
<ipython-input-153-3d5c9d0e2047> in <module>()
1 optimal_configuration, info, _ = optunity.maximize_structured(performance,
2 search_space=search,
----> 3 num_evals=300)
C:\ProgramData\Anaconda3\lib\site-packages\optunity\api.py in maximize_structured(f, search_space, num_evals, pmap)
368 solver = make_solver(**suggestion)
369 solution, details = optimize(solver, f, maximize=True, max_evals=num_evals,
--> 370 pmap=pmap, decoder=tree.decode)
371 return solution, details, suggestion
372
C:\ProgramData\Anaconda3\lib\site-packages\optunity\api.py in optimize(solver, func, maximize, max_evals, pmap, decoder)
243 time = timeit.default_timer()
244 try:
--> 245 solution, report = solver.optimize(f, maximize, pmap=pmap)
246 except fun.MaximumEvaluationsException:
247 # early stopping because maximum number of evaluations is reached
C:\ProgramData\Anaconda3\lib\site-packages\optunity\solvers\ParticleSwarm.py in optimize(self, f, maximize, pmap)
269 for g in range(self.num_generations):
270 fitnesses = pmap(evaluate, list(map(self.particle2dict, pop)))
--> 271 for part, fitness in zip(pop, fitnesses):
272 part.fitness = fit * util.score(fitness)
273 if not part.best or part.best_fitness < part.fitness:
C:\ProgramData\Anaconda3\lib\site-packages\optunity\solvers\ParticleSwarm.py in evaluate(d)
257 #functools.wraps(f)
258 def evaluate(d):
--> 259 return f(**d)
260
261 if maximize:
C:\ProgramData\Anaconda3\lib\site-packages\optunity\functions.py in wrapped_f(*args, **kwargs)
354 else:
355 wrapped_f.num_evals += 1
--> 356 return f(*args, **kwargs)
357 wrapped_f.num_evals = 0
358 return wrapped_f
C:\ProgramData\Anaconda3\lib\site-packages\optunity\constraints.py in wrapped_f(*args, **kwargs)
149 def wrapped_f(*args, **kwargs):
150 try:
--> 151 return f(*args, **kwargs)
152 except ConstraintViolation:
153 return default
C:\ProgramData\Anaconda3\lib\site-packages\optunity\constraints.py in wrapped_f(*args, **kwargs)
127 if violations:
128 raise ConstraintViolation(violations, *args, **kwargs)
--> 129 return f(*args, **kwargs)
130 wrapped_f.constraints = constraints
131 return wrapped_f
C:\ProgramData\Anaconda3\lib\site-packages\optunity\constraints.py in func(*args, **kwargs)
264 #functions.wraps(f)
265 def func(*args, **kwargs):
--> 266 return f(*args, **kwargs)
267 return func
268
C:\ProgramData\Anaconda3\lib\site-packages\optunity\search_spaces.py in wrapped(**kwargs)
324 def wrapped(**kwargs):
325 decoded = self.decode(kwargs)
--> 326 return f(**decoded)
327 return wrapped
328
C:\ProgramData\Anaconda3\lib\site-packages\optunity\functions.py in wrapped_f(*args, **kwargs)
299 value = wrapped_f.call_log.get(*args, **kwargs)
300 if value is None:
--> 301 value = f(*args, **kwargs)
302 wrapped_f.call_log.insert(value, *args, **kwargs)
303 return value
C:\ProgramData\Anaconda3\lib\site-packages\optunity\cross_validation.py in __call__(self, *args, **kwargs)
396 for i in range(self.num_folds)
397 if not i == fold]))
--> 398 kwargs['x_train'] = select(self.x, rows_train)
399 kwargs['x_test'] = select(self.x, rows_test)
400 if not self.y is None: # dealing with a supervised algorithm
C:\ProgramData\Anaconda3\lib\site-packages\optunity\cross_validation.py in select(collection, indices)
82 indexset = set(indices)
83 return collection.zipWithIndex().filter(lambda x: x[1] in indexset).map(lambda x: x[0])
---> 84 return [collection[i] for i in indices]
85
86
C:\ProgramData\Anaconda3\lib\site-packages\optunity\cross_validation.py in <listcomp>(.0)
82 indexset = set(indices)
83 return collection.zipWithIndex().filter(lambda x: x[1] in indexset).map(lambda x: x[0])
---> 84 return [collection[i] for i in indices]
85
86
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\frame.py in __getitem__(self, key)
1962 return self._getitem_multilevel(key)
1963 else:
-> 1964 return self._getitem_column(key)
1965
1966 def _getitem_column(self, key):
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\frame.py in _getitem_column(self, key)
1969 # get column
1970 if self.columns.is_unique:
-> 1971 return self._get_item_cache(key)
1972
1973 # duplicate columns & possible reduce dimensionality
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\generic.py in _get_item_cache(self, item)
1643 res = cache.get(item)
1644 if res is None:
-> 1645 values = self._data.get(item)
1646 res = self._box_item_values(item, values)
1647 cache[item] = res
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\internals.py in get(self, item, fastpath)
3588
3589 if not isnull(item):
-> 3590 loc = self.items.get_loc(item)
3591 else:
3592 indexer = np.arange(len(self.items))[isnull(self.items)]
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
2442 return self._engine.get_loc(key)
2443 except KeyError:
-> 2444 return self._engine.get_loc(self._maybe_cast_indexer(key))
2445
2446 indexer = self.get_indexer([key], method=method, tolerance=tolerance)
pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 26770
So I got the answer.The format of input data set needed to be in array and the label as a boolean list.