I am attaching the code . I am trying to replicate the code given in the link link. I am trying this code on my data. Below is the code and error that i am getting. Sorry for the bad formation of the question I tried to insert the code in a proper way but it doesnt seem to work for some commands.
search = {'algorithm': {'k-nn': {'n_neighbors': [1, 5]},
'SVM': {'kernel': {'linear': {'C': [0, 2]},
'rbf': {'gamma': [0, 1], 'C': [0, 10]},
'poly': {'degree': [2, 5], 'C': [0, 50], 'coef0': [0, 1]}
}
},
'naive-bayes': None,
'random-forest': {'n_estimators': [10, 30],
'max_features': [5, 20]}
}
}
#optunity.cross_validated(x=data, y=labels, num_folds=5)
def performance(x_train, y_train, x_test, y_test,
algorithm, n_neighbors=None, n_estimators=None, max_features=None,
kernel=None, C=None, gamma=None, degree=None, coef0=None):
# fit the model
if algorithm == 'k-nn':
model = KNeighborsClassifier(n_neighbors=int(n_neighbors))
model.fit(x_train, y_train)
elif algorithm == 'naive-bayes':
model = GaussianNB()
model.fit(x_train, y_train)
elif algorithm == 'random-forest':
model = RandomForestClassifier(n_estimators=int(n_estimators),
max_features=int(max_features))
model.fit(x_train, y_train)
# predict the test set
if algorithm == 'k-nn':
predictions = model.predict_proba(x_test)[:, 1]
else:
predictions = model.predict_proba(x_test)[:, 1]
return optunity.metrics.roc_auc(y_test, predictions, positive=True)
optimal_configuration, info, _ = optunity.maximize_structured(performance,
search_space=search,
num_evals=300)
This is the error message that I am getting
TypeError Traceback (most recent call last)
C:\ProgramData\Anaconda3\lib\site-packages\optunity\cross_validation.py in select(collection, indices)
76 try:
---> 77 return collection[indices, ...]
78 except IndexError: # caused by scipy.sparse in some versions
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\frame.py in __getitem__(self, key)
1963 else:
-> 1964 return self._getitem_column(key)
1965
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\frame.py in _getitem_column(self, key)
1970 if self.columns.is_unique:
-> 1971 return self._get_item_cache(key)
1972
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\generic.py in _get_item_cache(self, item)
1642 cache = self._item_cache
-> 1643 res = cache.get(item)
1644 if res is None:
TypeError: unhashable type: 'list'
During handling of the above exception, another exception occurred:
KeyError Traceback (most recent call last)
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
2441 try:
-> 2442 return self._engine.get_loc(key)
2443 except KeyError:
pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 26770
During handling of the above exception, another exception occurred:
KeyError Traceback (most recent call last)
<ipython-input-153-3d5c9d0e2047> in <module>()
1 optimal_configuration, info, _ = optunity.maximize_structured(performance,
2 search_space=search,
----> 3 num_evals=300)
C:\ProgramData\Anaconda3\lib\site-packages\optunity\api.py in maximize_structured(f, search_space, num_evals, pmap)
368 solver = make_solver(**suggestion)
369 solution, details = optimize(solver, f, maximize=True, max_evals=num_evals,
--> 370 pmap=pmap, decoder=tree.decode)
371 return solution, details, suggestion
372
C:\ProgramData\Anaconda3\lib\site-packages\optunity\api.py in optimize(solver, func, maximize, max_evals, pmap, decoder)
243 time = timeit.default_timer()
244 try:
--> 245 solution, report = solver.optimize(f, maximize, pmap=pmap)
246 except fun.MaximumEvaluationsException:
247 # early stopping because maximum number of evaluations is reached
C:\ProgramData\Anaconda3\lib\site-packages\optunity\solvers\ParticleSwarm.py in optimize(self, f, maximize, pmap)
269 for g in range(self.num_generations):
270 fitnesses = pmap(evaluate, list(map(self.particle2dict, pop)))
--> 271 for part, fitness in zip(pop, fitnesses):
272 part.fitness = fit * util.score(fitness)
273 if not part.best or part.best_fitness < part.fitness:
C:\ProgramData\Anaconda3\lib\site-packages\optunity\solvers\ParticleSwarm.py in evaluate(d)
257 #functools.wraps(f)
258 def evaluate(d):
--> 259 return f(**d)
260
261 if maximize:
C:\ProgramData\Anaconda3\lib\site-packages\optunity\functions.py in wrapped_f(*args, **kwargs)
354 else:
355 wrapped_f.num_evals += 1
--> 356 return f(*args, **kwargs)
357 wrapped_f.num_evals = 0
358 return wrapped_f
C:\ProgramData\Anaconda3\lib\site-packages\optunity\constraints.py in wrapped_f(*args, **kwargs)
149 def wrapped_f(*args, **kwargs):
150 try:
--> 151 return f(*args, **kwargs)
152 except ConstraintViolation:
153 return default
C:\ProgramData\Anaconda3\lib\site-packages\optunity\constraints.py in wrapped_f(*args, **kwargs)
127 if violations:
128 raise ConstraintViolation(violations, *args, **kwargs)
--> 129 return f(*args, **kwargs)
130 wrapped_f.constraints = constraints
131 return wrapped_f
C:\ProgramData\Anaconda3\lib\site-packages\optunity\constraints.py in func(*args, **kwargs)
264 #functions.wraps(f)
265 def func(*args, **kwargs):
--> 266 return f(*args, **kwargs)
267 return func
268
C:\ProgramData\Anaconda3\lib\site-packages\optunity\search_spaces.py in wrapped(**kwargs)
324 def wrapped(**kwargs):
325 decoded = self.decode(kwargs)
--> 326 return f(**decoded)
327 return wrapped
328
C:\ProgramData\Anaconda3\lib\site-packages\optunity\functions.py in wrapped_f(*args, **kwargs)
299 value = wrapped_f.call_log.get(*args, **kwargs)
300 if value is None:
--> 301 value = f(*args, **kwargs)
302 wrapped_f.call_log.insert(value, *args, **kwargs)
303 return value
C:\ProgramData\Anaconda3\lib\site-packages\optunity\cross_validation.py in __call__(self, *args, **kwargs)
396 for i in range(self.num_folds)
397 if not i == fold]))
--> 398 kwargs['x_train'] = select(self.x, rows_train)
399 kwargs['x_test'] = select(self.x, rows_test)
400 if not self.y is None: # dealing with a supervised algorithm
C:\ProgramData\Anaconda3\lib\site-packages\optunity\cross_validation.py in select(collection, indices)
82 indexset = set(indices)
83 return collection.zipWithIndex().filter(lambda x: x[1] in indexset).map(lambda x: x[0])
---> 84 return [collection[i] for i in indices]
85
86
C:\ProgramData\Anaconda3\lib\site-packages\optunity\cross_validation.py in <listcomp>(.0)
82 indexset = set(indices)
83 return collection.zipWithIndex().filter(lambda x: x[1] in indexset).map(lambda x: x[0])
---> 84 return [collection[i] for i in indices]
85
86
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\frame.py in __getitem__(self, key)
1962 return self._getitem_multilevel(key)
1963 else:
-> 1964 return self._getitem_column(key)
1965
1966 def _getitem_column(self, key):
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\frame.py in _getitem_column(self, key)
1969 # get column
1970 if self.columns.is_unique:
-> 1971 return self._get_item_cache(key)
1972
1973 # duplicate columns & possible reduce dimensionality
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\generic.py in _get_item_cache(self, item)
1643 res = cache.get(item)
1644 if res is None:
-> 1645 values = self._data.get(item)
1646 res = self._box_item_values(item, values)
1647 cache[item] = res
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\internals.py in get(self, item, fastpath)
3588
3589 if not isnull(item):
-> 3590 loc = self.items.get_loc(item)
3591 else:
3592 indexer = np.arange(len(self.items))[isnull(self.items)]
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
2442 return self._engine.get_loc(key)
2443 except KeyError:
-> 2444 return self._engine.get_loc(self._maybe_cast_indexer(key))
2445
2446 indexer = self.get_indexer([key], method=method, tolerance=tolerance)
pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 26770
So I got the answer.The format of input data set needed to be in array and the label as a boolean list.
Related
I have data, which has 2 input columns and 42 output columns. Here is what my code looks like:
data_columns=["value","average"]
prediction_columns=[]
for i in range(43):
prediction_columns.append("s"+str(i))
from keras.preprocessing.sequence import TimeseriesGenerator
n_input = 24*4 #how many samples/rows/timesteps to look in the past in order to forecast the next sample
n_features= len(prediction_columns)#X_train.shape[1] # how many predictors/Xs/features we have to predict y
b_size = 7 # Number of timeseries samples in each batch
generator = TimeseriesGenerator(columns[data_columns], columns[prediction_columns], length=n_input, batch_size=b_size)
print(generator[0][0].shape)
This fails with following:
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
File /usr/lib/python3.10/site-packages/pandas/core/indexes/base.py:3621, in Index.get_loc(self, key, method, tolerance)
3620 try:
-> 3621 return self._engine.get_loc(casted_key)
3622 except KeyError as err:
File /usr/lib/python3.10/site-packages/pandas/_libs/index.pyx:136, in pandas._libs.index.IndexEngine.get_loc()
File /usr/lib/python3.10/site-packages/pandas/_libs/index.pyx:163, in pandas._libs.index.IndexEngine.get_loc()
File pandas/_libs/hashtable_class_helper.pxi:5198, in pandas._libs.hashtable.PyObjectHashTable.get_item()
File pandas/_libs/hashtable_class_helper.pxi:5206, in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 96
The above exception was the direct cause of the following exception:
KeyError Traceback (most recent call last)
Input In [38], in <cell line: 8>()
5 b_size = 7 # Number of timeseries samples in each batch
6 generator = TimeseriesGenerator(columns[data_columns], columns[prediction_columns], length=n_input, batch_size=b_size)
----> 8 print(generator[0][0].shape)
File /usr/lib/python3.10/site-packages/keras/preprocessing/sequence.py:176, in __getitem__(self, index)
172 rows = np.random.randint(
173 self.start_index, self.end_index + 1, size=self.batch_size
174 )
175 else:
--> 176 i = self.start_index + self.batch_size * self.stride * index
177 rows = np.arange(
178 i,
179 min(i + self.batch_size * self.stride, self.end_index + 1),
180 self.stride,
181 )
183 samples = np.array(
184 [
185 self.data[row - self.length : row : self.sampling_rate]
186 for row in rows
187 ]
188 )
File /usr/lib/python3.10/site-packages/keras/preprocessing/sequence.py:176, in <listcomp>(.0)
172 rows = np.random.randint(
173 self.start_index, self.end_index + 1, size=self.batch_size
174 )
175 else:
--> 176 i = self.start_index + self.batch_size * self.stride * index
177 rows = np.arange(
178 i,
179 min(i + self.batch_size * self.stride, self.end_index + 1),
180 self.stride,
181 )
183 samples = np.array(
184 [
185 self.data[row - self.length : row : self.sampling_rate]
186 for row in rows
187 ]
188 )
File /usr/lib/python3.10/site-packages/pandas/core/frame.py:3505, in DataFrame.__getitem__(self, key)
3503 if self.columns.nlevels > 1:
3504 return self._getitem_multilevel(key)
-> 3505 indexer = self.columns.get_loc(key)
3506 if is_integer(indexer):
3507 indexer = [indexer]
File /usr/lib/python3.10/site-packages/pandas/core/indexes/base.py:3623, in Index.get_loc(self, key, method, tolerance)
3621 return self._engine.get_loc(casted_key)
3622 except KeyError as err:
-> 3623 raise KeyError(key) from err
3624 except TypeError:
3625 # If we have a listlike key, _check_indexing_error will raise
3626 # InvalidIndexError. Otherwise we fall through and re-raise
3627 # the TypeError.
3628 self._check_indexing_error(key)
KeyError: 96
I thought that maybe I'm providing invalid column names, but both columns[data_columns].head() and columns[prediction_columns].head() execute without issues
Columns shape is (42749, 45)
What could be the source of the problem?
Problem went away after I converted dataframes to numpy
generator = TimeseriesGenerator(work[data_columns].to_numpy(), work[prediction_columns].to_numpy(), length=n_input, batch_size=b_size)
preprocess_train ran successfully.
Has anyone encountered similar KeyError when using preprocess_test / ktrain (0.18.5)?
preprocessing test...
language: en
test sequence lengths:
mean : 608
95percentile : 2102
99percentile : 4783
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
in
----> 1 val = t.preprocess_test(X_test, y_test)
~/.local/lib/python3.6/site-packages/ktrain/text/preprocessor.py in preprocess_test(self, texts, y, verbose)
1172 """
1173 self.check_trained()
-> 1174 return self.preprocess_train(texts, y=y, mode='test', verbose=verbose)
1175
1176
~/.local/lib/python3.6/site-packages/ktrain/text/preprocessor.py in preprocess_train(self, texts, y, mode, verbose)
1142 TransformerDataset if self.use_with_learner = True else tf.Dataset
1143 """
-> 1144 tseq = super().preprocess_train(texts, y=y, mode=mode, verbose=verbose)
1145 if self.use_with_learner: return tseq
1146 tseq.batch_size = self.batch_size
~/.local/lib/python3.6/site-packages/ktrain/text/preprocessor.py in preprocess_train(self, texts, y, mode, verbose)
910 elif y is None:
911 y = np.array([1] * len(texts))
--> 912 y = self._transform_y(y, verbose=verbose)
913
914 # convert examples
~/.local/lib/python3.6/site-packages/ktrain/text/preprocessor.py in _transform_y(self, y_data, verbose)
505
506 # check for errors and warnings
--> 507 if not isinstance(y_data[0], str) and len(y_data.shape) ==1 and not self.get_classes():
508 if verbose:
509 warnings.warn('Task is being treated as TEXT REGRESSION because ' +\
~/.local/lib/python3.6/site-packages/pandas/core/series.py in __getitem__(self, key)
869 key = com.apply_if_callable(key, self)
870 try:
--> 871 result = self.index.get_value(self, key)
872
873 if not is_scalar(result):
~/.local/lib/python3.6/site-packages/pandas/core/indexes/base.py in get_value(self, series, key)
4403 k = self._convert_scalar_indexer(k, kind="getitem")
4404 try:
-> 4405 return self._engine.get_value(s, k, tz=getattr(series.dtype, "tz", None))
4406 except KeyError as e1:
4407 if len(self) > 0 and (self.holds_integer() or self.is_boolean()):
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_value()
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_value()
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.Int64HashTable.get_item()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.Int64HashTable.get_item()
KeyError: 0
It looks like you're supplying a pandas Series to preprocess_test instead of NumPy arrays or Python lists. If X_test and y_test are pandas Series, supply X_test.values and y_test.values to preprocess_test.
I have 400 columns and I am trying to do min-max normalization (row-wise). For the first 200 points I want to do min-max normalization and scale it between 0 and 500 and do the same for the next two hundred points but scale it between 0 and 10.
import pandas as pd
import numpy as np
df = pd.DataFrame(np.random.randint(500,1000,size=(5, 400)))
def normalize(ds,value):
normalizedds = []
normalizedds.extend((ds[:value] - np.min(ds[:value])) / np.max(ds[:value] - np.min(ds[:value])) * 500)
normalizedds.extend(ds[value:value*2] / np.max(ds[value:value*2]) * 10)
return normalizedds
normalizeddsList = pd.DataFrame.from_records(df.apply(normalize, value=200, axis=1))
I get the following error!
ValueError Traceback (most recent call last)
~\AppData\Roaming\Python\Python37\site-packages\pandas\core\indexes\base.py in get_slice_bound(self, label, side, kind)
5166 try:
-> 5167 return self._searchsorted_monotonic(label, side)
5168 except ValueError:
~\AppData\Roaming\Python\Python37\site-packages\pandas\core\indexes\base.py in _searchsorted_monotonic(self, label, side)
5127
-> 5128 raise ValueError("index must be monotonic increasing or decreasing")
5129
ValueError: index must be monotonic increasing or decreasing
During handling of the above exception, another exception occurred:
KeyError Traceback (most recent call last)
in
----> 1 scaledCardList = pd.DataFrame.from_records(originalCardList.apply(scale, pointCount=200, axis=1))
~\AppData\Roaming\Python\Python37\site-packages\pandas\core\frame.py in apply(self, func, axis, broadcast, raw, reduce, result_type, args, **kwds)
6926 kwds=kwds,
6927 )
-> 6928 return op.get_result()
6929
6930 def applymap(self, func):
~\AppData\Roaming\Python\Python37\site-packages\pandas\core\apply.py in get_result(self)
184 return self.apply_raw()
185
--> 186 return self.apply_standard()
187
188 def apply_empty_result(self):
~\AppData\Roaming\Python\Python37\site-packages\pandas\core\apply.py in apply_standard(self)
290
291 # compute the result using the series generator
--> 292 self.apply_series_generator()
293
294 # wrap results
~\AppData\Roaming\Python\Python37\site-packages\pandas\core\apply.py in apply_series_generator(self)
319 try:
320 for i, v in enumerate(series_gen):
--> 321 results[i] = self.f(v)
322 keys.append(v.name)
323 except Exception as e:
~\AppData\Roaming\Python\Python37\site-packages\pandas\core\apply.py in f(x)
110
111 def f(x):
--> 112 return func(x, *args, **kwds)
113
114 else:
in scale(card, pointCount)
1 def scale(card, pointCount):
2 scaledCard = []
----> 3 scaledCard.extend((card[:pointCount] - np.min(card[:pointCount])) / np.max(card[:pointCount] - np.min(card[:pointCount])) * 10000)
4 scaledCard.extend(card[pointCount:pointCount*2] / np.max(card[pointCount:pointCount*2]) * 100)
5 return scaledCard
~\AppData\Roaming\Python\Python37\site-packages\pandas\core\series.py in getitem(self, key)
1111 key = check_bool_indexer(self.index, key)
1112
-> 1113 return self._get_with(key)
1114
1115 def _get_with(self, key):
~\AppData\Roaming\Python\Python37\site-packages\pandas\core\series.py in _get_with(self, key)
1116 # other: fancy integer or otherwise
1117 if isinstance(key, slice):
-> 1118 indexer = self.index._convert_slice_indexer(key, kind="getitem")
1119 return self._get_values(indexer)
1120 elif isinstance(key, ABCDataFrame):
~\AppData\Roaming\Python\Python37\site-packages\pandas\core\indexes\numeric.py in _convert_slice_indexer(self, key, kind)
395
396 # translate to locations
--> 397 return self.slice_indexer(key.start, key.stop, key.step, kind=kind)
398
399 def _format_native_types(
~\AppData\Roaming\Python\Python37\site-packages\pandas\core\indexes\base.py in slice_indexer(self, start, end, step, kind)
5032 slice(1, 3)
5033 """
-> 5034 start_slice, end_slice = self.slice_locs(start, end, step=step, kind=kind)
5035
5036 # return a slice
~\AppData\Roaming\Python\Python37\site-packages\pandas\core\indexes\base.py in slice_locs(self, start, end, step, kind)
5252 end_slice = None
5253 if end is not None:
-> 5254 end_slice = self.get_slice_bound(end, "right", kind)
5255 if end_slice is None:
5256 end_slice = len(self)
~\AppData\Roaming\Python\Python37\site-packages\pandas\core\indexes\base.py in get_slice_bound(self, label, side, kind)
5168 except ValueError:
5169 # raise the original KeyError
-> 5170 raise err
5171
5172 if isinstance(slc, np.ndarray):
~\AppData\Roaming\Python\Python37\site-packages\pandas\core\indexes\base.py in get_slice_bound(self, label, side, kind)
5162 # we need to look up the label
5163 try:
-> 5164 slc = self.get_loc(label)
5165 except KeyError as err:
5166 try:
~\AppData\Roaming\Python\Python37\site-packages\pandas\core\indexes\numeric.py in get_loc(self, key, method, tolerance)
477 except (TypeError, NotImplementedError):
478 pass
--> 479 return super().get_loc(key, method=method, tolerance=tolerance)
480
481 #cache_readonly
~\AppData\Roaming\Python\Python37\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
2897 return self._engine.get_loc(key)
2898 except KeyError:
-> 2899 return self._engine.get_loc(self._maybe_cast_indexer(key))
2900 indexer = self.get_indexer([key], method=method, tolerance=tolerance)
2901 if indexer.ndim > 1 or indexer.size > 1:
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.Float64HashTable.get_item()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.Float64HashTable.get_item()
KeyError: (200.0, 'occurred at index 0')
I am encountering problems with running strategies based on European stocks using zipline. I have also raised the issue on Github, but so far I have not heard back about it.
Steps:
I downloaded data into CSV files (two files names abn.csv and aex.csv). The head of one looks like this:
date,open,high,low,close,volume,dividend,split
2017-01-02,21.100000381469727,21.295000076293945,20.94499969482422,21.274999618530273,407919,0,0
2017-01-03,21.334999084472656,22.06999969482422,21.334999084472656,21.950000762939453,1453872,0,0
2017-01-04,21.989999771118164,22.135000228881836,21.934999465942383,22.125,1169976,0,0
I modified the extension.py file to contain:
import pandas as pd
from zipline.data.bundles import register
from zipline.data.bundles.csvdir import csvdir_equities
start_session = pd.Timestamp('2017-01-02', tz='utc')
end_session = pd.Timestamp('2019-06-28', tz='utc')
# register the bundle
register(
'eu_stocks',
csvdir_equities(
['daily'],
'/path/',
),
calendar_name='XAMS', # Euronext Amsterdam
start_session=start_session,
end_session=end_session
)
I ingested the data
I ran the following strategy
def initialize(context):
set_benchmark('aex')
context.asset = symbol('abn')
def handle_data(context, data):
order_target_percent(context.asset, 0.5)
What results in the following error:
KeyError Traceback (most recent call last)
pandas/_libs/index.pyx in pandas._libs.index.DatetimeEngine.get_loc()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.Int64HashTable.get_item()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.Int64HashTable.get_item()
KeyError: 1483315200000000000
During handling of the above exception, another exception occurred:
KeyError Traceback (most recent call last)
<ipython-input-33-9eaf08e4c73f> in <module>()
----> 1 get_ipython().run_cell_magic('zipline', '--start 2017-1-2 --end 2019-6-28 --capital-base 1050.0 --bundle eu_stocks -o out.pkl', "\ndef initialize(context):\n set_benchmark('aex')\n context.asset = symbol('abn')\n\ndef handle_data(context, data):\n order_target_percent(context.asset, 0.5)")
~/anaconda3/envs/env_zipline2/lib/python3.5/site-packages/IPython/core/interactiveshell.py in run_cell_magic(self, magic_name, line, cell)
2165 magic_arg_s = self.var_expand(line, stack_depth)
2166 with self.builtin_trap:
-> 2167 result = fn(magic_arg_s, cell)
2168 return result
2169
~/anaconda3/envs/env_zipline2/lib/python3.5/site-packages/zipline/__main__.py in zipline_magic(line, cell)
309 '%s%%zipline' % ((cell or '') and '%'),
310 # don't use system exit and propogate errors to the caller
--> 311 standalone_mode=False,
312 )
313 except SystemExit as e:
~/anaconda3/envs/env_zipline2/lib/python3.5/site-packages/click/core.py in main(self, args, prog_name, complete_var, standalone_mode, **extra)
695 try:
696 with self.make_context(prog_name, args, **extra) as ctx:
--> 697 rv = self.invoke(ctx)
698 if not standalone_mode:
699 return rv
~/anaconda3/envs/env_zipline2/lib/python3.5/site-packages/click/core.py in invoke(self, ctx)
893 """
894 if self.callback is not None:
--> 895 return ctx.invoke(self.callback, **ctx.params)
896
897
~/anaconda3/envs/env_zipline2/lib/python3.5/site-packages/click/core.py in invoke(*args, **kwargs)
533 with augment_usage_errors(self):
534 with ctx:
--> 535 return callback(*args, **kwargs)
536
537 def forward(*args, **kwargs):
~/anaconda3/envs/env_zipline2/lib/python3.5/site-packages/click/decorators.py in new_func(*args, **kwargs)
15 """
16 def new_func(*args, **kwargs):
---> 17 return f(get_current_context(), *args, **kwargs)
18 return update_wrapper(new_func, f)
19
~/anaconda3/envs/env_zipline2/lib/python3.5/site-packages/zipline/__main__.py in run(ctx, algofile, algotext, define, data_frequency, capital_base, bundle, bundle_timestamp, start, end, output, trading_calendar, print_algo, metrics_set, local_namespace, blotter)
274 local_namespace=local_namespace,
275 environ=os.environ,
--> 276 blotter=blotter,
277 )
278
~/anaconda3/envs/env_zipline2/lib/python3.5/site-packages/zipline/utils/run_algo.py in _run(handle_data, initialize, before_trading_start, analyze, algofile, algotext, defines, data_frequency, capital_base, data, bundle, bundle_timestamp, start, end, output, trading_calendar, print_algo, metrics_set, local_namespace, environ, blotter)
167 equity_minute_reader=bundle_data.equity_minute_bar_reader,
168 equity_daily_reader=bundle_data.equity_daily_bar_reader,
--> 169 adjustment_reader=bundle_data.adjustment_reader,
170 )
171
~/anaconda3/envs/env_zipline2/lib/python3.5/site-packages/zipline/data/data_portal.py in __init__(self, asset_finder, trading_calendar, first_trading_day, equity_daily_reader, equity_minute_reader, future_daily_reader, future_minute_reader, adjustment_reader, last_available_session, last_available_minute, minute_history_prefetch_length, daily_history_prefetch_length)
289 self._first_trading_day
290 )
--> 291 if self._first_trading_day is not None else (None, None)
292 )
293
~/anaconda3/envs/env_zipline2/lib/python3.5/site-packages/trading_calendars/trading_calendar.py in open_and_close_for_session(self, session_label)
758 # http://pandas.pydata.org/pandas-docs/stable/whatsnew.html#datetime-with-tz # noqa
759 return (
--> 760 sched.at[session_label, 'market_open'].tz_localize(UTC),
761 sched.at[session_label, 'market_close'].tz_localize(UTC),
762 )
~/anaconda3/envs/env_zipline2/lib/python3.5/site-packages/pandas/core/indexing.py in __getitem__(self, key)
1867
1868 key = self._convert_key(key)
-> 1869 return self.obj._get_value(*key, takeable=self._takeable)
1870
1871 def __setitem__(self, key, value):
~/anaconda3/envs/env_zipline2/lib/python3.5/site-packages/pandas/core/frame.py in _get_value(self, index, col, takeable)
1983
1984 try:
-> 1985 return engine.get_value(series._values, index)
1986 except (TypeError, ValueError):
1987
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_value()
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_value()
pandas/_libs/index.pyx in pandas._libs.index.DatetimeEngine.get_loc()
KeyError: Timestamp('2017-01-02 00:00:00+0000', tz='UTC')
I'm Trying to convert a column into category in order to perform a pivot_table operation.
I've tried the following:
user_item_df = user_item.pivot_table(index='msno',
columns='song_id',
values='interacted',
aggfunc='mean')
And I got this:
ValueError Traceback (most recent call last)
<ipython-input-76-a870ece1f3e8> in <module>
2 columns='song_id',
3 values='interacted',
----> 4 aggfunc='mean')
~/anaconda3/lib/python3.6/site-packages/dask/dataframe/core.py in pivot_table(self, index, columns, values, aggfunc)
3123 from .reshape import pivot_table
3124 return pivot_table(self, index=index, columns=columns, values=values,
-> 3125 aggfunc=aggfunc)
3126
3127 def to_records(self, index=False):
~/anaconda3/lib/python3.6/site-packages/dask/dataframe/reshape.py in pivot_table(df, index, columns, values, aggfunc)
190 raise ValueError("'columns' must be the name of an existing column")
191 if not is_categorical_dtype(df[columns]):
--> 192 raise ValueError("'columns' must be category dtype")
193 if not has_known_categories(df[columns]):
194 raise ValueError("'columns' must have known categories. Please use "
ValueError: 'columns' must be category dtype
So I've tried to convert the column:
user_item.song_id = user_item.song_id.astype('category')
But I got this when calling pivot_table:
ValueError Traceback (most recent call last)
<ipython-input-78-a870ece1f3e8> in <module>
2 columns='song_id',
3 values='interacted',
----> 4 aggfunc='mean')
~/anaconda3/lib/python3.6/site-packages/dask/dataframe/core.py in pivot_table(self, index, columns, values, aggfunc)
3123 from .reshape import pivot_table
3124 return pivot_table(self, index=index, columns=columns, values=values,
-> 3125 aggfunc=aggfunc)
3126
3127 def to_records(self, index=False):
~/anaconda3/lib/python3.6/site-packages/dask/dataframe/reshape.py in pivot_table(df, index, columns, values, aggfunc)
192 raise ValueError("'columns' must be category dtype")
193 if not has_known_categories(df[columns]):
--> 194 raise ValueError("'columns' must have known categories. Please use "
195 "`df[columns].cat.as_known()` beforehand to ensure "
196 "known categories")
ValueError: 'columns' must have known categories. Please use `df[columns].cat.as_known()` beforehand to ensure known categories
Then I tried:
user_item.song_id = user_item.song_id.astype('category').cat.as_known()
And I immediately got:
KeyError Traceback (most recent call last)
<timed exec> in <module>
~/anaconda3/lib/python3.6/site-packages/dask/dataframe/categorical.py in as_known(self, **kwargs)
187 if self.known:
188 return self._series
--> 189 categories = self._property_map('categories').unique().compute(**kwargs)
190 return self.set_categories(categories.values)
191
~/anaconda3/lib/python3.6/site-packages/dask/base.py in compute(self, **kwargs)
154 dask.base.compute
155 """
--> 156 (result,) = compute(self, traverse=False, **kwargs)
157 return result
158
~/anaconda3/lib/python3.6/site-packages/dask/base.py in compute(*args, **kwargs)
395 keys = [x.__dask_keys__() for x in collections]
396 postcomputes = [x.__dask_postcompute__() for x in collections]
--> 397 results = schedule(dsk, keys, **kwargs)
398 return repack([f(r, *a) for r, (f, a) in zip(results, postcomputes)])
399
~/anaconda3/lib/python3.6/site-packages/distributed/client.py in get(self, dsk, keys, restrictions, loose_restrictions, resources, sync, asynchronous, direct, retries, priority, fifo_timeout, actors, **kwargs)
2336 try:
2337 results = self.gather(packed, asynchronous=asynchronous,
-> 2338 direct=direct)
2339 finally:
2340 for f in futures.values():
~/anaconda3/lib/python3.6/site-packages/distributed/client.py in gather(self, futures, errors, maxsize, direct, asynchronous)
1660 return self.sync(self._gather, futures, errors=errors,
1661 direct=direct, local_worker=local_worker,
-> 1662 asynchronous=asynchronous)
1663
1664 #gen.coroutine
~/anaconda3/lib/python3.6/site-packages/distributed/client.py in sync(self, func, *args, **kwargs)
674 return future
675 else:
--> 676 return sync(self.loop, func, *args, **kwargs)
677
678 def __repr__(self):
~/anaconda3/lib/python3.6/site-packages/distributed/utils.py in sync(loop, func, *args, **kwargs)
275 e.wait(10)
276 if error[0]:
--> 277 six.reraise(*error[0])
278 else:
279 return result[0]
~/anaconda3/lib/python3.6/site-packages/six.py in reraise(tp, value, tb)
684 if value.__traceback__ is not tb:
685 raise value.with_traceback(tb)
--> 686 raise value
687
688 else:
~/anaconda3/lib/python3.6/site-packages/distributed/utils.py in f()
260 if timeout is not None:
261 future = gen.with_timeout(timedelta(seconds=timeout), future)
--> 262 result[0] = yield future
263 except Exception as exc:
264 error[0] = sys.exc_info()
~/anaconda3/lib/python3.6/site-packages/tornado/gen.py in run(self)
1131
1132 try:
-> 1133 value = future.result()
1134 except Exception:
1135 self.had_exception = True
~/anaconda3/lib/python3.6/site-packages/tornado/gen.py in run(self)
1139 if exc_info is not None:
1140 try:
-> 1141 yielded = self.gen.throw(*exc_info)
1142 finally:
1143 # Break up a reference to itself
~/anaconda3/lib/python3.6/site-packages/distributed/client.py in _gather(self, futures, errors, direct, local_worker)
1501 six.reraise(type(exception),
1502 exception,
-> 1503 traceback)
1504 if errors == 'skip':
1505 bad_keys.add(key)
~/anaconda3/lib/python3.6/site-packages/six.py in reraise(tp, value, tb)
683 value = tp()
684 if value.__traceback__ is not tb:
--> 685 raise value.with_traceback(tb)
686 raise value
687
/home/pi/env/lib/python3.5/site-packages/dask/dataframe/core.py in apply_and_enforce()
KeyError: '_func'
And the output of my workers is:
Exception: KeyError('_func',)
distributed.worker - WARNING - Compute Failed
Function: execute_task
args: ((<function apply at 0x764b3c90>, <function unique at 0x6ef24a50>, [(<function apply_and_enforce at 0x6eeede88>, <function Accessor._delegate_property at 0x6ef28198>, [(<function apply_and_enforce at 0x6eeede88>, <methodcaller: astype>, [(<built-in function getitem>, (<function apply at 0x764b3c90>, <function partial_by_order at 0x762ebd20>, [ msno ... interacted
0 vDi/nHqBu7wb+DtI2Ix4TupWQatUEFR41mDC0c8Voh8= ... 1
1 3IGfhB6dtaYxEGm20yFtRxN7KoFZjzGJbXPSjsjW5cM= ... 1
2 4QugsKXr1pJXSBj6CbSYCF6O7QY2/MHGICUU16p3fig= ... 1
3 i4g6DQpmkTuRCS6/osUsQ8GSBJM8261is4Q04NDGRPk= ... 1
4 TTNNMisplhps4y5gdQ6rsv0++TIKOOIIZLz05W97vFU= ... 1
5 sDR8kS+t73zE9QM8D03Zw3mVrsRXc0Nih/WRl02sfZI= ... 1
6 yiGYGWyGrCYHlMOtPv65urw9RfdH43PNGzu8TRaO+m8= ... 1
7 7lXXPZLRbAPWE5ILi2BFQVEhYzPz9cwNvuzIVCuHfZY= ... 1
8 4clHF4wjaFgY6+nQWoXm1EEAvB
kwargs: {}
Exception: KeyError('_func',)
If anyone Know how to fix this issue it would helps me a lot.
Solved by putting the same version of dask-distributed dask-core across all the workers, scheduler and client.