KeyError: 0 when using preprocess_test

KeyError: 0 when using preprocess_test - python

preprocess_train ran successfully.
Has anyone encountered similar KeyError when using preprocess_test / ktrain (0.18.5)?
preprocessing test...
language: en
test sequence lengths:
mean : 608
95percentile : 2102
99percentile : 4783
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
in
----> 1 val = t.preprocess_test(X_test, y_test)
~/.local/lib/python3.6/site-packages/ktrain/text/preprocessor.py in preprocess_test(self, texts, y, verbose)
1172 """
1173 self.check_trained()
-> 1174 return self.preprocess_train(texts, y=y, mode='test', verbose=verbose)
1175
1176
~/.local/lib/python3.6/site-packages/ktrain/text/preprocessor.py in preprocess_train(self, texts, y, mode, verbose)
1142 TransformerDataset if self.use_with_learner = True else tf.Dataset
1143 """
-> 1144 tseq = super().preprocess_train(texts, y=y, mode=mode, verbose=verbose)
1145 if self.use_with_learner: return tseq
1146 tseq.batch_size = self.batch_size
~/.local/lib/python3.6/site-packages/ktrain/text/preprocessor.py in preprocess_train(self, texts, y, mode, verbose)
910 elif y is None:
911 y = np.array([1] * len(texts))
--> 912 y = self._transform_y(y, verbose=verbose)
913
914 # convert examples
~/.local/lib/python3.6/site-packages/ktrain/text/preprocessor.py in _transform_y(self, y_data, verbose)
505
506 # check for errors and warnings
--> 507 if not isinstance(y_data[0], str) and len(y_data.shape) ==1 and not self.get_classes():
508 if verbose:
509 warnings.warn('Task is being treated as TEXT REGRESSION because ' +\
~/.local/lib/python3.6/site-packages/pandas/core/series.py in __getitem__(self, key)
869 key = com.apply_if_callable(key, self)
870 try:
--> 871 result = self.index.get_value(self, key)
872
873 if not is_scalar(result):
~/.local/lib/python3.6/site-packages/pandas/core/indexes/base.py in get_value(self, series, key)
4403 k = self._convert_scalar_indexer(k, kind="getitem")
4404 try:
-> 4405 return self._engine.get_value(s, k, tz=getattr(series.dtype, "tz", None))
4406 except KeyError as e1:
4407 if len(self) > 0 and (self.holds_integer() or self.is_boolean()):
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_value()
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_value()
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.Int64HashTable.get_item()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.Int64HashTable.get_item()
KeyError: 0

It looks like you're supplying a pandas Series to preprocess_test instead of NumPy arrays or Python lists. If X_test and y_test are pandas Series, supply X_test.values and y_test.values to preprocess_test.

Related

Python pd.read_sql dataframe KeyError

I have a sql query to pull in contact records. I am looping through each record and assigning the values to variables in python. It's working for all the values variible pairs except HMid. I can't for the life of me figure out why. When I print Updatedata HMid shows up fine as a number like this '106594451'.
Any guidance is much appreciated.
updateSQL = """ Select c.mobilephone
,l.HMid
,[firstname]
,[lastname]
,fullname
,[emailaddress1]
from contact c where and \ksl_communityid = '%s'
""" % (CommunityID)
Updatedata = pd.read_sql(updateSQL, cnxnCS1)
print(Updatedata)
for i in range(len(Updatedata)):
mobilephone = data.loc[i,'mobilephone']
firstname = data.loc[i,'firstname']
lastname = data.loc[i,'lastname']
fullname = data.loc[i,'fullname']
emailaddress1 = data.loc[i,'emailaddress1']
HMid= data.loc[i,'HMid']
Here is the error I am getting:
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
~\Anaconda3\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)get_loc(self, key, method, tolerance) 2645 try:
-> 2646 return self._engine.get_loc(key) 2647 except KeyError:
pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 'HMid'
During handling of the above exception, another exception occurred:
KeyError Traceback (most recent call last) <ipython-input-143-13351626f5cd> in <module>
53 fullname = data.loc[i,'fullname']
54 emailaddress1 = data.loc[i,'emailaddress1']
---> 55 HMid = data.loc[i,'HMid']
56
57 print(HMid)
~\Anaconda3\lib\site-packages\pandas\core\indexing.py in
__getitem__(self, key) 1759 except (KeyError, IndexError, AttributeError): 1760 pass
-> 1761 return self._getitem_tuple(key) 1762 else: 1763 # we by definition only have the 0th axis
~\Anaconda3\lib\site-packages\pandas\core\indexing.py in
_getitem_tuple(self, tup) 1269 def _getitem_tuple(self, tup: Tuple): 1270 try:
-> 1271 return self._getitem_lowerdim(tup) 1272 except IndexingError: 1273 pass
~\Anaconda3\lib\site-packages\pandas\core\indexing.py in
_getitem_lowerdim(self, tup) 1418 return section 1419 # This is an elided recursive call to iloc/loc/etc'
-> 1420 return getattr(section, self.name)[new_key] 1421 1422 raise IndexingError("not applicable")
~\Anaconda3\lib\site-packages\pandas\core\indexing.py in
__getitem__(self, key) 1765 1766 maybe_callable = com.apply_if_callable(key, self.obj)
-> 1767 return self._getitem_axis(maybe_callable, axis=axis) 1768 1769 def _is_scalar_access(self, key: Tuple):
~\Anaconda3\lib\site-packages\pandas\core\indexing.py in
_getitem_axis(self, key, axis) 1962 # fall thru to straight lookup 1963 self._validate_key(key, axis)
-> 1964 return self._get_label(key, axis=axis) 1965 1966
~\Anaconda3\lib\site-packages\pandas\core\indexing.py in
_get_label(self, label, axis)
618 # but will fail when the index is not present
619 # see GH5667
--> 620 return self.obj._xs(label, axis=axis)
621 elif isinstance(label, tuple) and isinstance(label[axis], slice):
622 raise IndexingError("no slices here, handle elsewhere")
~\Anaconda3\lib\site-packages\pandas\core\generic.py in xs(self, key, axis, level, drop_level) 3535 loc, new_index = self.index.get_loc_level(key, drop_level=drop_level) 3536 else:
-> 3537 loc = self.index.get_loc(key) 3538 3539 if isinstance(loc, np.ndarray):
~\Anaconda3\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance) 2646 return self._engine.get_loc(key) 2647 except KeyError:
-> 2648 return self._engine.get_loc(self._maybe_cast_indexer(key)) 2649 indexer = self.get_indexer([key], method=method, tolerance=tolerance) 2650 if indexer.ndim > 1 or indexer.size > 1:
pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 'HMid'

Error when applying normalize function in python

I have 400 columns and I am trying to do min-max normalization (row-wise). For the first 200 points I want to do min-max normalization and scale it between 0 and 500 and do the same for the next two hundred points but scale it between 0 and 10.
import pandas as pd
import numpy as np
df = pd.DataFrame(np.random.randint(500,1000,size=(5, 400)))
def normalize(ds,value):
normalizedds = []
normalizedds.extend((ds[:value] - np.min(ds[:value])) / np.max(ds[:value] - np.min(ds[:value])) * 500)
normalizedds.extend(ds[value:value*2] / np.max(ds[value:value*2]) * 10)
return normalizedds
normalizeddsList = pd.DataFrame.from_records(df.apply(normalize, value=200, axis=1))
I get the following error!
ValueError Traceback (most recent call last)
~\AppData\Roaming\Python\Python37\site-packages\pandas\core\indexes\base.py in get_slice_bound(self, label, side, kind)
5166 try:
-> 5167 return self._searchsorted_monotonic(label, side)
5168 except ValueError:
~\AppData\Roaming\Python\Python37\site-packages\pandas\core\indexes\base.py in _searchsorted_monotonic(self, label, side)
5127
-> 5128 raise ValueError("index must be monotonic increasing or decreasing")
5129
ValueError: index must be monotonic increasing or decreasing
During handling of the above exception, another exception occurred:
KeyError Traceback (most recent call last)
in
----> 1 scaledCardList = pd.DataFrame.from_records(originalCardList.apply(scale, pointCount=200, axis=1))
~\AppData\Roaming\Python\Python37\site-packages\pandas\core\frame.py in apply(self, func, axis, broadcast, raw, reduce, result_type, args, **kwds)
6926 kwds=kwds,
6927 )
-> 6928 return op.get_result()
6929
6930 def applymap(self, func):
~\AppData\Roaming\Python\Python37\site-packages\pandas\core\apply.py in get_result(self)
184 return self.apply_raw()
185
--> 186 return self.apply_standard()
187
188 def apply_empty_result(self):
~\AppData\Roaming\Python\Python37\site-packages\pandas\core\apply.py in apply_standard(self)
290
291 # compute the result using the series generator
--> 292 self.apply_series_generator()
293
294 # wrap results
~\AppData\Roaming\Python\Python37\site-packages\pandas\core\apply.py in apply_series_generator(self)
319 try:
320 for i, v in enumerate(series_gen):
--> 321 results[i] = self.f(v)
322 keys.append(v.name)
323 except Exception as e:
~\AppData\Roaming\Python\Python37\site-packages\pandas\core\apply.py in f(x)
110
111 def f(x):
--> 112 return func(x, *args, **kwds)
113
114 else:
in scale(card, pointCount)
1 def scale(card, pointCount):
2 scaledCard = []
----> 3 scaledCard.extend((card[:pointCount] - np.min(card[:pointCount])) / np.max(card[:pointCount] - np.min(card[:pointCount])) * 10000)
4 scaledCard.extend(card[pointCount:pointCount*2] / np.max(card[pointCount:pointCount*2]) * 100)
5 return scaledCard
~\AppData\Roaming\Python\Python37\site-packages\pandas\core\series.py in getitem(self, key)
1111 key = check_bool_indexer(self.index, key)
1112
-> 1113 return self._get_with(key)
1114
1115 def _get_with(self, key):
~\AppData\Roaming\Python\Python37\site-packages\pandas\core\series.py in _get_with(self, key)
1116 # other: fancy integer or otherwise
1117 if isinstance(key, slice):
-> 1118 indexer = self.index._convert_slice_indexer(key, kind="getitem")
1119 return self._get_values(indexer)
1120 elif isinstance(key, ABCDataFrame):
~\AppData\Roaming\Python\Python37\site-packages\pandas\core\indexes\numeric.py in _convert_slice_indexer(self, key, kind)
395
396 # translate to locations
--> 397 return self.slice_indexer(key.start, key.stop, key.step, kind=kind)
398
399 def _format_native_types(
~\AppData\Roaming\Python\Python37\site-packages\pandas\core\indexes\base.py in slice_indexer(self, start, end, step, kind)
5032 slice(1, 3)
5033 """
-> 5034 start_slice, end_slice = self.slice_locs(start, end, step=step, kind=kind)
5035
5036 # return a slice
~\AppData\Roaming\Python\Python37\site-packages\pandas\core\indexes\base.py in slice_locs(self, start, end, step, kind)
5252 end_slice = None
5253 if end is not None:
-> 5254 end_slice = self.get_slice_bound(end, "right", kind)
5255 if end_slice is None:
5256 end_slice = len(self)
~\AppData\Roaming\Python\Python37\site-packages\pandas\core\indexes\base.py in get_slice_bound(self, label, side, kind)
5168 except ValueError:
5169 # raise the original KeyError
-> 5170 raise err
5171
5172 if isinstance(slc, np.ndarray):
~\AppData\Roaming\Python\Python37\site-packages\pandas\core\indexes\base.py in get_slice_bound(self, label, side, kind)
5162 # we need to look up the label
5163 try:
-> 5164 slc = self.get_loc(label)
5165 except KeyError as err:
5166 try:
~\AppData\Roaming\Python\Python37\site-packages\pandas\core\indexes\numeric.py in get_loc(self, key, method, tolerance)
477 except (TypeError, NotImplementedError):
478 pass
--> 479 return super().get_loc(key, method=method, tolerance=tolerance)
480
481 #cache_readonly
~\AppData\Roaming\Python\Python37\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
2897 return self._engine.get_loc(key)
2898 except KeyError:
-> 2899 return self._engine.get_loc(self._maybe_cast_indexer(key))
2900 indexer = self.get_indexer([key], method=method, tolerance=tolerance)
2901 if indexer.ndim > 1 or indexer.size > 1:
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.Float64HashTable.get_item()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.Float64HashTable.get_item()
KeyError: (200.0, 'occurred at index 0')

Issues with running trading strategies on European stocks using zipline

I am encountering problems with running strategies based on European stocks using zipline. I have also raised the issue on Github, but so far I have not heard back about it.
Steps:
I downloaded data into CSV files (two files names abn.csv and aex.csv). The head of one looks like this:
date,open,high,low,close,volume,dividend,split
2017-01-02,21.100000381469727,21.295000076293945,20.94499969482422,21.274999618530273,407919,0,0
2017-01-03,21.334999084472656,22.06999969482422,21.334999084472656,21.950000762939453,1453872,0,0
2017-01-04,21.989999771118164,22.135000228881836,21.934999465942383,22.125,1169976,0,0
I modified the extension.py file to contain:
import pandas as pd
from zipline.data.bundles import register
from zipline.data.bundles.csvdir import csvdir_equities
start_session = pd.Timestamp('2017-01-02', tz='utc')
end_session = pd.Timestamp('2019-06-28', tz='utc')
# register the bundle
register(
'eu_stocks',
csvdir_equities(
['daily'],
'/path/',
),
calendar_name='XAMS', # Euronext Amsterdam
start_session=start_session,
end_session=end_session
)
I ingested the data
I ran the following strategy
def initialize(context):
set_benchmark('aex')
context.asset = symbol('abn')
def handle_data(context, data):
order_target_percent(context.asset, 0.5)
What results in the following error:
KeyError Traceback (most recent call last)
pandas/_libs/index.pyx in pandas._libs.index.DatetimeEngine.get_loc()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.Int64HashTable.get_item()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.Int64HashTable.get_item()
KeyError: 1483315200000000000
During handling of the above exception, another exception occurred:
KeyError Traceback (most recent call last)
<ipython-input-33-9eaf08e4c73f> in <module>()
----> 1 get_ipython().run_cell_magic('zipline', '--start 2017-1-2 --end 2019-6-28 --capital-base 1050.0 --bundle eu_stocks -o out.pkl', "\ndef initialize(context):\n set_benchmark('aex')\n context.asset = symbol('abn')\n\ndef handle_data(context, data):\n order_target_percent(context.asset, 0.5)")
~/anaconda3/envs/env_zipline2/lib/python3.5/site-packages/IPython/core/interactiveshell.py in run_cell_magic(self, magic_name, line, cell)
2165 magic_arg_s = self.var_expand(line, stack_depth)
2166 with self.builtin_trap:
-> 2167 result = fn(magic_arg_s, cell)
2168 return result
2169
~/anaconda3/envs/env_zipline2/lib/python3.5/site-packages/zipline/__main__.py in zipline_magic(line, cell)
309 '%s%%zipline' % ((cell or '') and '%'),
310 # don't use system exit and propogate errors to the caller
--> 311 standalone_mode=False,
312 )
313 except SystemExit as e:
~/anaconda3/envs/env_zipline2/lib/python3.5/site-packages/click/core.py in main(self, args, prog_name, complete_var, standalone_mode, **extra)
695 try:
696 with self.make_context(prog_name, args, **extra) as ctx:
--> 697 rv = self.invoke(ctx)
698 if not standalone_mode:
699 return rv
~/anaconda3/envs/env_zipline2/lib/python3.5/site-packages/click/core.py in invoke(self, ctx)
893 """
894 if self.callback is not None:
--> 895 return ctx.invoke(self.callback, **ctx.params)
896
897
~/anaconda3/envs/env_zipline2/lib/python3.5/site-packages/click/core.py in invoke(*args, **kwargs)
533 with augment_usage_errors(self):
534 with ctx:
--> 535 return callback(*args, **kwargs)
536
537 def forward(*args, **kwargs):
~/anaconda3/envs/env_zipline2/lib/python3.5/site-packages/click/decorators.py in new_func(*args, **kwargs)
15 """
16 def new_func(*args, **kwargs):
---> 17 return f(get_current_context(), *args, **kwargs)
18 return update_wrapper(new_func, f)
19
~/anaconda3/envs/env_zipline2/lib/python3.5/site-packages/zipline/__main__.py in run(ctx, algofile, algotext, define, data_frequency, capital_base, bundle, bundle_timestamp, start, end, output, trading_calendar, print_algo, metrics_set, local_namespace, blotter)
274 local_namespace=local_namespace,
275 environ=os.environ,
--> 276 blotter=blotter,
277 )
278
~/anaconda3/envs/env_zipline2/lib/python3.5/site-packages/zipline/utils/run_algo.py in _run(handle_data, initialize, before_trading_start, analyze, algofile, algotext, defines, data_frequency, capital_base, data, bundle, bundle_timestamp, start, end, output, trading_calendar, print_algo, metrics_set, local_namespace, environ, blotter)
167 equity_minute_reader=bundle_data.equity_minute_bar_reader,
168 equity_daily_reader=bundle_data.equity_daily_bar_reader,
--> 169 adjustment_reader=bundle_data.adjustment_reader,
170 )
171
~/anaconda3/envs/env_zipline2/lib/python3.5/site-packages/zipline/data/data_portal.py in __init__(self, asset_finder, trading_calendar, first_trading_day, equity_daily_reader, equity_minute_reader, future_daily_reader, future_minute_reader, adjustment_reader, last_available_session, last_available_minute, minute_history_prefetch_length, daily_history_prefetch_length)
289 self._first_trading_day
290 )
--> 291 if self._first_trading_day is not None else (None, None)
292 )
293
~/anaconda3/envs/env_zipline2/lib/python3.5/site-packages/trading_calendars/trading_calendar.py in open_and_close_for_session(self, session_label)
758 # http://pandas.pydata.org/pandas-docs/stable/whatsnew.html#datetime-with-tz # noqa
759 return (
--> 760 sched.at[session_label, 'market_open'].tz_localize(UTC),
761 sched.at[session_label, 'market_close'].tz_localize(UTC),
762 )
~/anaconda3/envs/env_zipline2/lib/python3.5/site-packages/pandas/core/indexing.py in __getitem__(self, key)
1867
1868 key = self._convert_key(key)
-> 1869 return self.obj._get_value(*key, takeable=self._takeable)
1870
1871 def __setitem__(self, key, value):
~/anaconda3/envs/env_zipline2/lib/python3.5/site-packages/pandas/core/frame.py in _get_value(self, index, col, takeable)
1983
1984 try:
-> 1985 return engine.get_value(series._values, index)
1986 except (TypeError, ValueError):
1987
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_value()
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_value()
pandas/_libs/index.pyx in pandas._libs.index.DatetimeEngine.get_loc()
KeyError: Timestamp('2017-01-02 00:00:00+0000', tz='UTC')

How to shorten certain substrings in a DataFrame

I have a series of SKUs in a DataFrame: [35641, 265689494123, 36492, 56526246546, 26412...].
The problem is that the long barcodes (like 56526246546) in the DataFrame need to be truncated at certain points. The length over 5 should trigger the deletion process, which truncates like [7:12] in a list.
I tried using the following code without any prevail:
if df.loc[len(df['SKU']) > 5]:
df.loc[df['SKU'].df.slice(start=7,stop=12)]
I get following error messages:
KeyError Traceback (most recent call last)
c:\users\User\appdata\local\programs\python\python37\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
2656 try:
-> 2657 return self._engine.get_loc(key)
2658 except KeyError:
pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas\_libs\index_class_helper.pxi in pandas._libs.index.Int64Engine._check_type()
KeyError: True
During handling of the above exception, another exception occurred:
KeyError Traceback (most recent call last)
<ipython-input-64-cea7b4ca2640> in <module>
1 #g[:] = (elem[:12] for elem in g)
----> 2 if df.loc[len(df['SKU']) > 5]:
3 df.loc[df['SKU'].df.slice(start=7,stop=12)]
c:\users\User\appdata\local\programs\python\python37\lib\site-packages\pandas\core\indexing.py in __getitem__(self, key)
1498
1499 maybe_callable = com.apply_if_callable(key, self.obj)
-> 1500 return self._getitem_axis(maybe_callable, axis=axis)
1501
1502 def _is_scalar_access(self, key):
c:\users\User\appdata\local\programs\python\python37\lib\site-packages\pandas\core\indexing.py in _getitem_axis(self, key, axis)
1911 # fall thru to straight lookup
1912 self._validate_key(key, axis)
-> 1913 return self._get_label(key, axis=axis)
1914
1915
c:\users\User\appdata\local\programs\python\python37\lib\site-packages\pandas\core\indexing.py in _get_label(self, label, axis)
139 raise IndexingError('no slices here, handle elsewhere')
140
--> 141 return self.obj._xs(label, axis=axis)
142
143 def _get_loc(self, key, axis=None):
c:\users\User\appdata\local\programs\python\python37\lib\site-packages\pandas\core\generic.py in xs(self, key, axis, level, drop_level)
3583 drop_level=drop_level)
3584 else:
-> 3585 loc = self.index.get_loc(key)
3586
3587 if isinstance(loc, np.ndarray):
c:\users\User\appdata\local\programs\python\python37\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
2657 return self._engine.get_loc(key)
2658 except KeyError:
-> 2659 return self._engine.get_loc(self._maybe_cast_indexer(key))
2660 indexer = self.get_indexer([key], method=method, tolerance=tolerance)
2661 if indexer.ndim > 1 or indexer.size > 1:
pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas\_libs\index_class_helper.pxi in pandas._libs.index.Int64Engine._check_type()
KeyError: True
How do I fix this code?
P.S Some of the error messages seem to be popping up due to the fact that I've added the code BEFORE converting the dict into a DataFrame.

According to the output you want, I think you can use:
df['SKU'] = df['SKU'].apply(lambda x: int(str(x)[6:11]) if len(str(x)) > 5 else x)
Output:
SKU
0 35641
1 49412
2 36492
3 46546
4 26412

Here is my suggestion:
df.loc[:, 'SKU'] = df.loc[:, 'SKU'].astype(str).apply(lambda x: x[7:12] if len(x) > 5 else x)

optunity package python optunity.maximize_structured issue

I am attaching the code . I am trying to replicate the code given in the link link. I am trying this code on my data. Below is the code and error that i am getting. Sorry for the bad formation of the question I tried to insert the code in a proper way but it doesnt seem to work for some commands.
search = {'algorithm': {'k-nn': {'n_neighbors': [1, 5]},
'SVM': {'kernel': {'linear': {'C': [0, 2]},
'rbf': {'gamma': [0, 1], 'C': [0, 10]},
'poly': {'degree': [2, 5], 'C': [0, 50], 'coef0': [0, 1]}
}
},
'naive-bayes': None,
'random-forest': {'n_estimators': [10, 30],
'max_features': [5, 20]}
}
}
#optunity.cross_validated(x=data, y=labels, num_folds=5)
def performance(x_train, y_train, x_test, y_test,
algorithm, n_neighbors=None, n_estimators=None, max_features=None,
kernel=None, C=None, gamma=None, degree=None, coef0=None):
# fit the model
if algorithm == 'k-nn':
model = KNeighborsClassifier(n_neighbors=int(n_neighbors))
model.fit(x_train, y_train)
elif algorithm == 'naive-bayes':
model = GaussianNB()
model.fit(x_train, y_train)
elif algorithm == 'random-forest':
model = RandomForestClassifier(n_estimators=int(n_estimators),
max_features=int(max_features))
model.fit(x_train, y_train)
# predict the test set
if algorithm == 'k-nn':
predictions = model.predict_proba(x_test)[:, 1]
else:
predictions = model.predict_proba(x_test)[:, 1]
return optunity.metrics.roc_auc(y_test, predictions, positive=True)
optimal_configuration, info, _ = optunity.maximize_structured(performance,
search_space=search,
num_evals=300)
This is the error message that I am getting
TypeError Traceback (most recent call last)
C:\ProgramData\Anaconda3\lib\site-packages\optunity\cross_validation.py in select(collection, indices)
76 try:
---> 77 return collection[indices, ...]
78 except IndexError: # caused by scipy.sparse in some versions
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\frame.py in __getitem__(self, key)
1963 else:
-> 1964 return self._getitem_column(key)
1965
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\frame.py in _getitem_column(self, key)
1970 if self.columns.is_unique:
-> 1971 return self._get_item_cache(key)
1972
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\generic.py in _get_item_cache(self, item)
1642 cache = self._item_cache
-> 1643 res = cache.get(item)
1644 if res is None:
TypeError: unhashable type: 'list'
During handling of the above exception, another exception occurred:
KeyError Traceback (most recent call last)
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
2441 try:
-> 2442 return self._engine.get_loc(key)
2443 except KeyError:
pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 26770
During handling of the above exception, another exception occurred:
KeyError Traceback (most recent call last)
<ipython-input-153-3d5c9d0e2047> in <module>()
1 optimal_configuration, info, _ = optunity.maximize_structured(performance,
2 search_space=search,
----> 3 num_evals=300)
C:\ProgramData\Anaconda3\lib\site-packages\optunity\api.py in maximize_structured(f, search_space, num_evals, pmap)
368 solver = make_solver(**suggestion)
369 solution, details = optimize(solver, f, maximize=True, max_evals=num_evals,
--> 370 pmap=pmap, decoder=tree.decode)
371 return solution, details, suggestion
372
C:\ProgramData\Anaconda3\lib\site-packages\optunity\api.py in optimize(solver, func, maximize, max_evals, pmap, decoder)
243 time = timeit.default_timer()
244 try:
--> 245 solution, report = solver.optimize(f, maximize, pmap=pmap)
246 except fun.MaximumEvaluationsException:
247 # early stopping because maximum number of evaluations is reached
C:\ProgramData\Anaconda3\lib\site-packages\optunity\solvers\ParticleSwarm.py in optimize(self, f, maximize, pmap)
269 for g in range(self.num_generations):
270 fitnesses = pmap(evaluate, list(map(self.particle2dict, pop)))
--> 271 for part, fitness in zip(pop, fitnesses):
272 part.fitness = fit * util.score(fitness)
273 if not part.best or part.best_fitness < part.fitness:
C:\ProgramData\Anaconda3\lib\site-packages\optunity\solvers\ParticleSwarm.py in evaluate(d)
257 #functools.wraps(f)
258 def evaluate(d):
--> 259 return f(**d)
260
261 if maximize:
C:\ProgramData\Anaconda3\lib\site-packages\optunity\functions.py in wrapped_f(*args, **kwargs)
354 else:
355 wrapped_f.num_evals += 1
--> 356 return f(*args, **kwargs)
357 wrapped_f.num_evals = 0
358 return wrapped_f
C:\ProgramData\Anaconda3\lib\site-packages\optunity\constraints.py in wrapped_f(*args, **kwargs)
149 def wrapped_f(*args, **kwargs):
150 try:
--> 151 return f(*args, **kwargs)
152 except ConstraintViolation:
153 return default
C:\ProgramData\Anaconda3\lib\site-packages\optunity\constraints.py in wrapped_f(*args, **kwargs)
127 if violations:
128 raise ConstraintViolation(violations, *args, **kwargs)
--> 129 return f(*args, **kwargs)
130 wrapped_f.constraints = constraints
131 return wrapped_f
C:\ProgramData\Anaconda3\lib\site-packages\optunity\constraints.py in func(*args, **kwargs)
264 #functions.wraps(f)
265 def func(*args, **kwargs):
--> 266 return f(*args, **kwargs)
267 return func
268
C:\ProgramData\Anaconda3\lib\site-packages\optunity\search_spaces.py in wrapped(**kwargs)
324 def wrapped(**kwargs):
325 decoded = self.decode(kwargs)
--> 326 return f(**decoded)
327 return wrapped
328
C:\ProgramData\Anaconda3\lib\site-packages\optunity\functions.py in wrapped_f(*args, **kwargs)
299 value = wrapped_f.call_log.get(*args, **kwargs)
300 if value is None:
--> 301 value = f(*args, **kwargs)
302 wrapped_f.call_log.insert(value, *args, **kwargs)
303 return value
C:\ProgramData\Anaconda3\lib\site-packages\optunity\cross_validation.py in __call__(self, *args, **kwargs)
396 for i in range(self.num_folds)
397 if not i == fold]))
--> 398 kwargs['x_train'] = select(self.x, rows_train)
399 kwargs['x_test'] = select(self.x, rows_test)
400 if not self.y is None: # dealing with a supervised algorithm
C:\ProgramData\Anaconda3\lib\site-packages\optunity\cross_validation.py in select(collection, indices)
82 indexset = set(indices)
83 return collection.zipWithIndex().filter(lambda x: x[1] in indexset).map(lambda x: x[0])
---> 84 return [collection[i] for i in indices]
85
86
C:\ProgramData\Anaconda3\lib\site-packages\optunity\cross_validation.py in <listcomp>(.0)
82 indexset = set(indices)
83 return collection.zipWithIndex().filter(lambda x: x[1] in indexset).map(lambda x: x[0])
---> 84 return [collection[i] for i in indices]
85
86
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\frame.py in __getitem__(self, key)
1962 return self._getitem_multilevel(key)
1963 else:
-> 1964 return self._getitem_column(key)
1965
1966 def _getitem_column(self, key):
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\frame.py in _getitem_column(self, key)
1969 # get column
1970 if self.columns.is_unique:
-> 1971 return self._get_item_cache(key)
1972
1973 # duplicate columns & possible reduce dimensionality
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\generic.py in _get_item_cache(self, item)
1643 res = cache.get(item)
1644 if res is None:
-> 1645 values = self._data.get(item)
1646 res = self._box_item_values(item, values)
1647 cache[item] = res
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\internals.py in get(self, item, fastpath)
3588
3589 if not isnull(item):
-> 3590 loc = self.items.get_loc(item)
3591 else:
3592 indexer = np.arange(len(self.items))[isnull(self.items)]
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
2442 return self._engine.get_loc(key)
2443 except KeyError:
-> 2444 return self._engine.get_loc(self._maybe_cast_indexer(key))
2445
2446 indexer = self.get_indexer([key], method=method, tolerance=tolerance)
pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 26770

So I got the answer.The format of input data set needed to be in array and the label as a boolean list.

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

KeyError: 0 when using preprocess_test - python

It looks like you're supplying a pandas Series to preprocess_test instead of NumPy arrays or Python lists. If X_test and y_test are pandas Series, supply X_test.values and y_test.values to preprocess_test.

Related

Python pd.read_sql dataframe KeyError

Error when applying normalize function in python

Issues with running trading strategies on European stocks using zipline

How to shorten certain substrings in a DataFrame

optunity package python optunity.maximize_structured issue

Categories

Resources