Issues with running trading strategies on European stocks using zipline - python

I am encountering problems with running strategies based on European stocks using zipline. I have also raised the issue on Github, but so far I have not heard back about it.
Steps:
I downloaded data into CSV files (two files names abn.csv and aex.csv). The head of one looks like this:
date,open,high,low,close,volume,dividend,split
2017-01-02,21.100000381469727,21.295000076293945,20.94499969482422,21.274999618530273,407919,0,0
2017-01-03,21.334999084472656,22.06999969482422,21.334999084472656,21.950000762939453,1453872,0,0
2017-01-04,21.989999771118164,22.135000228881836,21.934999465942383,22.125,1169976,0,0
I modified the extension.py file to contain:
import pandas as pd
from zipline.data.bundles import register
from zipline.data.bundles.csvdir import csvdir_equities
start_session = pd.Timestamp('2017-01-02', tz='utc')
end_session = pd.Timestamp('2019-06-28', tz='utc')
# register the bundle
register(
'eu_stocks',
csvdir_equities(
['daily'],
'/path/',
),
calendar_name='XAMS', # Euronext Amsterdam
start_session=start_session,
end_session=end_session
)
I ingested the data
I ran the following strategy
def initialize(context):
set_benchmark('aex')
context.asset = symbol('abn')
def handle_data(context, data):
order_target_percent(context.asset, 0.5)
What results in the following error:
KeyError Traceback (most recent call last)
pandas/_libs/index.pyx in pandas._libs.index.DatetimeEngine.get_loc()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.Int64HashTable.get_item()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.Int64HashTable.get_item()
KeyError: 1483315200000000000
During handling of the above exception, another exception occurred:
KeyError Traceback (most recent call last)
<ipython-input-33-9eaf08e4c73f> in <module>()
----> 1 get_ipython().run_cell_magic('zipline', '--start 2017-1-2 --end 2019-6-28 --capital-base 1050.0 --bundle eu_stocks -o out.pkl', "\ndef initialize(context):\n set_benchmark('aex')\n context.asset = symbol('abn')\n\ndef handle_data(context, data):\n order_target_percent(context.asset, 0.5)")
~/anaconda3/envs/env_zipline2/lib/python3.5/site-packages/IPython/core/interactiveshell.py in run_cell_magic(self, magic_name, line, cell)
2165 magic_arg_s = self.var_expand(line, stack_depth)
2166 with self.builtin_trap:
-> 2167 result = fn(magic_arg_s, cell)
2168 return result
2169
~/anaconda3/envs/env_zipline2/lib/python3.5/site-packages/zipline/__main__.py in zipline_magic(line, cell)
309 '%s%%zipline' % ((cell or '') and '%'),
310 # don't use system exit and propogate errors to the caller
--> 311 standalone_mode=False,
312 )
313 except SystemExit as e:
~/anaconda3/envs/env_zipline2/lib/python3.5/site-packages/click/core.py in main(self, args, prog_name, complete_var, standalone_mode, **extra)
695 try:
696 with self.make_context(prog_name, args, **extra) as ctx:
--> 697 rv = self.invoke(ctx)
698 if not standalone_mode:
699 return rv
~/anaconda3/envs/env_zipline2/lib/python3.5/site-packages/click/core.py in invoke(self, ctx)
893 """
894 if self.callback is not None:
--> 895 return ctx.invoke(self.callback, **ctx.params)
896
897
~/anaconda3/envs/env_zipline2/lib/python3.5/site-packages/click/core.py in invoke(*args, **kwargs)
533 with augment_usage_errors(self):
534 with ctx:
--> 535 return callback(*args, **kwargs)
536
537 def forward(*args, **kwargs):
~/anaconda3/envs/env_zipline2/lib/python3.5/site-packages/click/decorators.py in new_func(*args, **kwargs)
15 """
16 def new_func(*args, **kwargs):
---> 17 return f(get_current_context(), *args, **kwargs)
18 return update_wrapper(new_func, f)
19
~/anaconda3/envs/env_zipline2/lib/python3.5/site-packages/zipline/__main__.py in run(ctx, algofile, algotext, define, data_frequency, capital_base, bundle, bundle_timestamp, start, end, output, trading_calendar, print_algo, metrics_set, local_namespace, blotter)
274 local_namespace=local_namespace,
275 environ=os.environ,
--> 276 blotter=blotter,
277 )
278
~/anaconda3/envs/env_zipline2/lib/python3.5/site-packages/zipline/utils/run_algo.py in _run(handle_data, initialize, before_trading_start, analyze, algofile, algotext, defines, data_frequency, capital_base, data, bundle, bundle_timestamp, start, end, output, trading_calendar, print_algo, metrics_set, local_namespace, environ, blotter)
167 equity_minute_reader=bundle_data.equity_minute_bar_reader,
168 equity_daily_reader=bundle_data.equity_daily_bar_reader,
--> 169 adjustment_reader=bundle_data.adjustment_reader,
170 )
171
~/anaconda3/envs/env_zipline2/lib/python3.5/site-packages/zipline/data/data_portal.py in __init__(self, asset_finder, trading_calendar, first_trading_day, equity_daily_reader, equity_minute_reader, future_daily_reader, future_minute_reader, adjustment_reader, last_available_session, last_available_minute, minute_history_prefetch_length, daily_history_prefetch_length)
289 self._first_trading_day
290 )
--> 291 if self._first_trading_day is not None else (None, None)
292 )
293
~/anaconda3/envs/env_zipline2/lib/python3.5/site-packages/trading_calendars/trading_calendar.py in open_and_close_for_session(self, session_label)
758 # http://pandas.pydata.org/pandas-docs/stable/whatsnew.html#datetime-with-tz # noqa
759 return (
--> 760 sched.at[session_label, 'market_open'].tz_localize(UTC),
761 sched.at[session_label, 'market_close'].tz_localize(UTC),
762 )
~/anaconda3/envs/env_zipline2/lib/python3.5/site-packages/pandas/core/indexing.py in __getitem__(self, key)
1867
1868 key = self._convert_key(key)
-> 1869 return self.obj._get_value(*key, takeable=self._takeable)
1870
1871 def __setitem__(self, key, value):
~/anaconda3/envs/env_zipline2/lib/python3.5/site-packages/pandas/core/frame.py in _get_value(self, index, col, takeable)
1983
1984 try:
-> 1985 return engine.get_value(series._values, index)
1986 except (TypeError, ValueError):
1987
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_value()
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_value()
pandas/_libs/index.pyx in pandas._libs.index.DatetimeEngine.get_loc()
KeyError: Timestamp('2017-01-02 00:00:00+0000', tz='UTC')

Related

Got 504 Deadline Exceeded in Jupiter Notebook (Python) with Big query

I am trying to get a the result of a Google Bigquery query in a pandas dataframe (in Jupiter notebook).
But everytime I try to run the query I get a DeadlineExceeded: 504 Deadline Exceeded.
This happens not only for queries in my own BQ project but also for other projects.
I have tried a lot of option to run the query like in here: https://cloud.google.com/bigquery/docs/bigquery-storage-python-pandas
Anyone have a idea how to fix this?
Query:
%load_ext google.cloud.bigquery
%%bigquery tax_forms --use_bqstorage_api
SELECT * FROM `bigquery-public-data.irs_990.irs_990_2012`
---------------------------------------------------------------------------
_MultiThreadedRendezvous Traceback (most recent call last)
~\AppData\Local\Continuum\anaconda3\lib\site-packages\google\api_core\grpc_helpers.py in error_remapped_callable(*args, **kwargs)
149 prefetch_first = getattr(callable_, "_prefetch_first_result_", True)
--> 150 return _StreamingResponseIterator(result, prefetch_first_result=prefetch_first)
151 except grpc.RpcError as exc:
~\AppData\Local\Continuum\anaconda3\lib\site-packages\google\api_core\grpc_helpers.py in __init__(self, wrapped, prefetch_first_result)
72 if prefetch_first_result:
---> 73 self._stored_first_result = six.next(self._wrapped)
74 except TypeError:
~\AppData\Local\Continuum\anaconda3\lib\site-packages\grpc\_channel.py in __next__(self)
415 def __next__(self):
--> 416 return self._next()
417
~\AppData\Local\Continuum\anaconda3\lib\site-packages\grpc\_channel.py in _next(self)
705 elif self._state.code is not None:
--> 706 raise self
707
_MultiThreadedRendezvous: <_MultiThreadedRendezvous of RPC that terminated with:
status = StatusCode.DEADLINE_EXCEEDED
details = "Deadline Exceeded"
debug_error_string = "{"created":"#1597838569.388000000","description":"Error received from peer ipv4:172.217.168.202:443","file":"src/core/lib/surface/call.cc","file_line":1062,"grpc_message":"Deadline Exceeded","grpc_status":4}"
>
The above exception was the direct cause of the following exception:
DeadlineExceeded Traceback (most recent call last)
<ipython-input-2-4fdaec7219df> in <module>
----> 1 get_ipython().run_cell_magic('bigquery', 'tax_forms --use_bqstorage_api', 'SELECT * FROM `bigquery-public-data.irs_990.irs_990_2012`\n')
~\AppData\Local\Continuum\anaconda3\lib\site-packages\IPython\core\interactiveshell.py in run_cell_magic(self, magic_name, line, cell)
2357 with self.builtin_trap:
2358 args = (magic_arg_s, cell)
-> 2359 result = fn(*args, **kwargs)
2360 return result
2361
~\AppData\Local\Continuum\anaconda3\lib\site-packages\google\cloud\bigquery\magics.py in _cell_magic(line, query)
589 )
590 else:
--> 591 result = query_job.to_dataframe(bqstorage_client=bqstorage_client)
592
593 if args.destination_var:
~\AppData\Local\Continuum\anaconda3\lib\site-packages\google\cloud\bigquery\job.py in to_dataframe(self, bqstorage_client, dtypes, progress_bar_type, create_bqstorage_client, date_as_object)
3381 progress_bar_type=progress_bar_type,
3382 create_bqstorage_client=create_bqstorage_client,
-> 3383 date_as_object=date_as_object,
3384 )
3385
~\AppData\Local\Continuum\anaconda3\lib\site-packages\google\cloud\bigquery\table.py in to_dataframe(self, bqstorage_client, dtypes, progress_bar_type, create_bqstorage_client, date_as_object)
1726 progress_bar_type=progress_bar_type,
1727 bqstorage_client=bqstorage_client,
-> 1728 create_bqstorage_client=create_bqstorage_client,
1729 )
1730
~\AppData\Local\Continuum\anaconda3\lib\site-packages\google\cloud\bigquery\table.py in to_arrow(self, progress_bar_type, bqstorage_client, create_bqstorage_client)
1544 record_batches = []
1545 for record_batch in self._to_arrow_iterable(
-> 1546 bqstorage_client=bqstorage_client
1547 ):
1548 record_batches.append(record_batch)
~\AppData\Local\Continuum\anaconda3\lib\site-packages\google\cloud\bigquery\table.py in _to_page_iterable(self, bqstorage_download, tabledata_list_download, bqstorage_client)
1433 ):
1434 if bqstorage_client is not None:
-> 1435 for item in bqstorage_download():
1436 yield item
1437 return
~\AppData\Local\Continuum\anaconda3\lib\site-packages\google\cloud\bigquery\_pandas_helpers.py in _download_table_bqstorage(project_id, table, bqstorage_client, preserve_order, selected_fields, page_to_item)
723 # Call result() on any finished threads to raise any
724 # exceptions encountered.
--> 725 future.result()
726
727 try:
~\AppData\Local\Continuum\anaconda3\lib\concurrent\futures\_base.py in result(self, timeout)
426 raise CancelledError()
427 elif self._state == FINISHED:
--> 428 return self.__get_result()
429
430 self._condition.wait(timeout)
~\AppData\Local\Continuum\anaconda3\lib\concurrent\futures\_base.py in __get_result(self)
382 def __get_result(self):
383 if self._exception:
--> 384 raise self._exception
385 else:
386 return self._result
~\AppData\Local\Continuum\anaconda3\lib\concurrent\futures\thread.py in run(self)
55
56 try:
---> 57 result = self.fn(*self.args, **self.kwargs)
58 except BaseException as exc:
59 self.future.set_exception(exc)
~\AppData\Local\Continuum\anaconda3\lib\site-packages\google\cloud\bigquery\_pandas_helpers.py in _download_table_bqstorage_stream(download_state, bqstorage_client, session, stream, worker_queue, page_to_item)
591 rowstream = bqstorage_client.read_rows(position).rows(session)
592 else:
--> 593 rowstream = bqstorage_client.read_rows(stream.name).rows(session)
594
595 for page in rowstream.pages:
~\AppData\Local\Continuum\anaconda3\lib\site-packages\google\cloud\bigquery_storage_v1\client.py in read_rows(self, name, offset, retry, timeout, metadata)
120 retry=retry,
121 timeout=timeout,
--> 122 metadata=metadata,
123 )
124 return reader.ReadRowsStream(
~\AppData\Local\Continuum\anaconda3\lib\site-packages\google\cloud\bigquery_storage_v1\gapic\big_query_read_client.py in read_rows(self, read_stream, offset, retry, timeout, metadata)
370
371 return self._inner_api_calls["read_rows"](
--> 372 request, retry=retry, timeout=timeout, metadata=metadata
373 )
374
~\AppData\Local\Continuum\anaconda3\lib\site-packages\google\api_core\gapic_v1\method.py in __call__(self, *args, **kwargs)
143 kwargs["metadata"] = metadata
144
--> 145 return wrapped_func(*args, **kwargs)
146
147
~\AppData\Local\Continuum\anaconda3\lib\site-packages\google\api_core\retry.py in retry_wrapped_func(*args, **kwargs)
284 sleep_generator,
285 self._deadline,
--> 286 on_error=on_error,
287 )
288
~\AppData\Local\Continuum\anaconda3\lib\site-packages\google\api_core\retry.py in retry_target(target, predicate, sleep_generator, deadline, on_error)
182 for sleep in sleep_generator:
183 try:
--> 184 return target()
185
186 # pylint: disable=broad-except
~\AppData\Local\Continuum\anaconda3\lib\site-packages\google\api_core\timeout.py in func_with_timeout(*args, **kwargs)
212 """Wrapped function that adds timeout."""
213 kwargs["timeout"] = next(timeouts)
--> 214 return func(*args, **kwargs)
215
216 return func_with_timeout
~\AppData\Local\Continuum\anaconda3\lib\site-packages\google\api_core\grpc_helpers.py in error_remapped_callable(*args, **kwargs)
150 return _StreamingResponseIterator(result, prefetch_first_result=prefetch_first)
151 except grpc.RpcError as exc:
--> 152 six.raise_from(exceptions.from_grpc_error(exc), exc)
153
154 return error_remapped_callable
~\AppData\Local\Continuum\anaconda3\lib\site-packages\six.py in raise_from(value, from_value)
DeadlineExceeded: 504 Deadline Exceeded
Let me know if you need to know more. Thanks in advance.
Rutger
It turned out to be a conflict between a Conda package and a pip packages.
I resolved it by reinstall all the packages.

Error plotting and saving figure with %matplotlib notebook

I'm trying to plot and save an interactive figure using matplotlib in Jupyter notebook using the following code:
%matplotlib notebook
plt.figure()
plt.imshow([[1, 2, 3],[4, 5, 6]])
plt.savefig('delete.png')
When I do this, I get the following error:
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
~/anaconda3/envs/asd/lib/python3.6/site-packages/matplotlib/backend_bases.py in _wait_cursor_for_draw_cm(self)
2772 try:
-> 2773 self.set_cursor(cursors.WAIT)
2774 yield
~/anaconda3/envs/asd/lib/python3.6/site-packages/matplotlib/backends/backend_webagg_core.py in set_cursor(self, cursor)
380 if cursor != self.cursor:
--> 381 self.canvas.send_event("cursor", cursor=cursor)
382 self.cursor = cursor
~/anaconda3/envs/asd/lib/python3.6/site-packages/matplotlib/backends/backend_webagg_core.py in send_event(self, event_type, **kwargs)
345 def send_event(self, event_type, **kwargs):
--> 346 self.manager._send_event(event_type, **kwargs)
347
AttributeError: 'NoneType' object has no attribute '_send_event'
During handling of the above exception, another exception occurred:
AttributeError Traceback (most recent call last) <ipython-input-8-de5d3bbf172d> in <module>
2 plt.figure()
3 plt.imshow([[1, 2, 3],[4, 5, 6]])
----> 4 plt.savefig('delete.png')
~/anaconda3/envs/asd/lib/python3.6/site-packages/matplotlib/pyplot.py in savefig(*args, **kwargs)
721 def savefig(*args, **kwargs):
722 fig = gcf()
--> 723 res = fig.savefig(*args, **kwargs)
724 fig.canvas.draw_idle() # need this if 'transparent=True' to reset colors
725 return res
~/anaconda3/envs/asd/lib/python3.6/site-packages/matplotlib/figure.py in savefig(self, fname, transparent, **kwargs)
2201 self.patch.set_visible(frameon)
2202
-> 2203 self.canvas.print_figure(fname, **kwargs)
2204
2205 if frameon:
~/anaconda3/envs/asd/lib/python3.6/site-packages/matplotlib/backend_bases.py in
print_figure(self, filename, dpi, facecolor, edgecolor, orientation, format, bbox_inches, **kwargs)
2091 orientation=orientation,
2092 bbox_inches_restore=_bbox_inches_restore,
-> 2093 **kwargs)
2094 finally:
2095 if bbox_inches and restore_bbox:
~/anaconda3/envs/asd/lib/python3.6/site-packages/matplotlib/backends/backend_agg.py in print_png(self, filename_or_obj, metadata, pil_kwargs, *args,
**kwargs)
512 }
513
--> 514 FigureCanvasAgg.draw(self)
515 if pil_kwargs is not None:
516 from PIL import Image
~/anaconda3/envs/asd/lib/python3.6/site-packages/matplotlib/backends/backend_agg.py in draw(self)
390 with RendererAgg.lock, \
391 (self.toolbar._wait_cursor_for_draw_cm() if self.toolbar
--> 392 else nullcontext()):
393 self.figure.draw(self.renderer)
394 # A GUI class may be need to update a window using this draw, so
~/anaconda3/envs/asd/lib/python3.6/contextlib.py in __enter__(self)
79 def __enter__(self):
80 try:
---> 81 return next(self.gen)
82 except StopIteration:
83 raise RuntimeError("generator didn't yield") from None
~/anaconda3/envs/asd/lib/python3.6/site-packages/matplotlib/backend_bases.py in _wait_cursor_for_draw_cm(self)
2774 yield
2775 finally:
-> 2776 self.set_cursor(self._lastCursor)
2777 else:
2778 yield
~/anaconda3/envs/asd/lib/python3.6/site-packages/matplotlib/backends/backend_webagg_core.py in set_cursor(self, cursor)
379 def set_cursor(self, cursor):
380 if cursor != self.cursor:
--> 381 self.canvas.send_event("cursor", cursor=cursor)
382 self.cursor = cursor
383
~/anaconda3/envs/asd/lib/python3.6/site-packages/matplotlib/backends/backend_webagg_core.py in send_event(self, event_type, **kwargs)
344
345 def send_event(self, event_type, **kwargs):
--> 346 self.manager._send_event(event_type, **kwargs)
347
348
AttributeError: 'NoneType' object has no attribute '_send_event'
The figure itself renders fine when I get rid of the savefig command. Furthermore, both rendering and saving work fine when I use %matplotlib inline instead of %matplotlib notebook. What is causing this issue and how can I save figures when using %matplotlib notebook?

Error when applying normalize function in python

I have 400 columns and I am trying to do min-max normalization (row-wise). For the first 200 points I want to do min-max normalization and scale it between 0 and 500 and do the same for the next two hundred points but scale it between 0 and 10.
import pandas as pd
import numpy as np
df = pd.DataFrame(np.random.randint(500,1000,size=(5, 400)))
def normalize(ds,value):
normalizedds = []
normalizedds.extend((ds[:value] - np.min(ds[:value])) / np.max(ds[:value] - np.min(ds[:value])) * 500)
normalizedds.extend(ds[value:value*2] / np.max(ds[value:value*2]) * 10)
return normalizedds
normalizeddsList = pd.DataFrame.from_records(df.apply(normalize, value=200, axis=1))
I get the following error!
ValueError Traceback (most recent call last)
~\AppData\Roaming\Python\Python37\site-packages\pandas\core\indexes\base.py in get_slice_bound(self, label, side, kind)
5166 try:
-> 5167 return self._searchsorted_monotonic(label, side)
5168 except ValueError:
~\AppData\Roaming\Python\Python37\site-packages\pandas\core\indexes\base.py in _searchsorted_monotonic(self, label, side)
5127
-> 5128 raise ValueError("index must be monotonic increasing or decreasing")
5129
ValueError: index must be monotonic increasing or decreasing
During handling of the above exception, another exception occurred:
KeyError Traceback (most recent call last)
in
----> 1 scaledCardList = pd.DataFrame.from_records(originalCardList.apply(scale, pointCount=200, axis=1))
~\AppData\Roaming\Python\Python37\site-packages\pandas\core\frame.py in apply(self, func, axis, broadcast, raw, reduce, result_type, args, **kwds)
6926 kwds=kwds,
6927 )
-> 6928 return op.get_result()
6929
6930 def applymap(self, func):
~\AppData\Roaming\Python\Python37\site-packages\pandas\core\apply.py in get_result(self)
184 return self.apply_raw()
185
--> 186 return self.apply_standard()
187
188 def apply_empty_result(self):
~\AppData\Roaming\Python\Python37\site-packages\pandas\core\apply.py in apply_standard(self)
290
291 # compute the result using the series generator
--> 292 self.apply_series_generator()
293
294 # wrap results
~\AppData\Roaming\Python\Python37\site-packages\pandas\core\apply.py in apply_series_generator(self)
319 try:
320 for i, v in enumerate(series_gen):
--> 321 results[i] = self.f(v)
322 keys.append(v.name)
323 except Exception as e:
~\AppData\Roaming\Python\Python37\site-packages\pandas\core\apply.py in f(x)
110
111 def f(x):
--> 112 return func(x, *args, **kwds)
113
114 else:
in scale(card, pointCount)
1 def scale(card, pointCount):
2 scaledCard = []
----> 3 scaledCard.extend((card[:pointCount] - np.min(card[:pointCount])) / np.max(card[:pointCount] - np.min(card[:pointCount])) * 10000)
4 scaledCard.extend(card[pointCount:pointCount*2] / np.max(card[pointCount:pointCount*2]) * 100)
5 return scaledCard
~\AppData\Roaming\Python\Python37\site-packages\pandas\core\series.py in getitem(self, key)
1111 key = check_bool_indexer(self.index, key)
1112
-> 1113 return self._get_with(key)
1114
1115 def _get_with(self, key):
~\AppData\Roaming\Python\Python37\site-packages\pandas\core\series.py in _get_with(self, key)
1116 # other: fancy integer or otherwise
1117 if isinstance(key, slice):
-> 1118 indexer = self.index._convert_slice_indexer(key, kind="getitem")
1119 return self._get_values(indexer)
1120 elif isinstance(key, ABCDataFrame):
~\AppData\Roaming\Python\Python37\site-packages\pandas\core\indexes\numeric.py in _convert_slice_indexer(self, key, kind)
395
396 # translate to locations
--> 397 return self.slice_indexer(key.start, key.stop, key.step, kind=kind)
398
399 def _format_native_types(
~\AppData\Roaming\Python\Python37\site-packages\pandas\core\indexes\base.py in slice_indexer(self, start, end, step, kind)
5032 slice(1, 3)
5033 """
-> 5034 start_slice, end_slice = self.slice_locs(start, end, step=step, kind=kind)
5035
5036 # return a slice
~\AppData\Roaming\Python\Python37\site-packages\pandas\core\indexes\base.py in slice_locs(self, start, end, step, kind)
5252 end_slice = None
5253 if end is not None:
-> 5254 end_slice = self.get_slice_bound(end, "right", kind)
5255 if end_slice is None:
5256 end_slice = len(self)
~\AppData\Roaming\Python\Python37\site-packages\pandas\core\indexes\base.py in get_slice_bound(self, label, side, kind)
5168 except ValueError:
5169 # raise the original KeyError
-> 5170 raise err
5171
5172 if isinstance(slc, np.ndarray):
~\AppData\Roaming\Python\Python37\site-packages\pandas\core\indexes\base.py in get_slice_bound(self, label, side, kind)
5162 # we need to look up the label
5163 try:
-> 5164 slc = self.get_loc(label)
5165 except KeyError as err:
5166 try:
~\AppData\Roaming\Python\Python37\site-packages\pandas\core\indexes\numeric.py in get_loc(self, key, method, tolerance)
477 except (TypeError, NotImplementedError):
478 pass
--> 479 return super().get_loc(key, method=method, tolerance=tolerance)
480
481 #cache_readonly
~\AppData\Roaming\Python\Python37\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
2897 return self._engine.get_loc(key)
2898 except KeyError:
-> 2899 return self._engine.get_loc(self._maybe_cast_indexer(key))
2900 indexer = self.get_indexer([key], method=method, tolerance=tolerance)
2901 if indexer.ndim > 1 or indexer.size > 1:
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.Float64HashTable.get_item()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.Float64HashTable.get_item()
KeyError: (200.0, 'occurred at index 0')
​
​
​

How to convert column into category 'as_known()' with dask dataframe?

I'm Trying to convert a column into category in order to perform a pivot_table operation.
I've tried the following:
user_item_df = user_item.pivot_table(index='msno',
columns='song_id',
values='interacted',
aggfunc='mean')
And I got this:
ValueError Traceback (most recent call last)
<ipython-input-76-a870ece1f3e8> in <module>
2 columns='song_id',
3 values='interacted',
----> 4 aggfunc='mean')
~/anaconda3/lib/python3.6/site-packages/dask/dataframe/core.py in pivot_table(self, index, columns, values, aggfunc)
3123 from .reshape import pivot_table
3124 return pivot_table(self, index=index, columns=columns, values=values,
-> 3125 aggfunc=aggfunc)
3126
3127 def to_records(self, index=False):
~/anaconda3/lib/python3.6/site-packages/dask/dataframe/reshape.py in pivot_table(df, index, columns, values, aggfunc)
190 raise ValueError("'columns' must be the name of an existing column")
191 if not is_categorical_dtype(df[columns]):
--> 192 raise ValueError("'columns' must be category dtype")
193 if not has_known_categories(df[columns]):
194 raise ValueError("'columns' must have known categories. Please use "
ValueError: 'columns' must be category dtype
So I've tried to convert the column:
user_item.song_id = user_item.song_id.astype('category')
But I got this when calling pivot_table:
ValueError Traceback (most recent call last)
<ipython-input-78-a870ece1f3e8> in <module>
2 columns='song_id',
3 values='interacted',
----> 4 aggfunc='mean')
~/anaconda3/lib/python3.6/site-packages/dask/dataframe/core.py in pivot_table(self, index, columns, values, aggfunc)
3123 from .reshape import pivot_table
3124 return pivot_table(self, index=index, columns=columns, values=values,
-> 3125 aggfunc=aggfunc)
3126
3127 def to_records(self, index=False):
~/anaconda3/lib/python3.6/site-packages/dask/dataframe/reshape.py in pivot_table(df, index, columns, values, aggfunc)
192 raise ValueError("'columns' must be category dtype")
193 if not has_known_categories(df[columns]):
--> 194 raise ValueError("'columns' must have known categories. Please use "
195 "`df[columns].cat.as_known()` beforehand to ensure "
196 "known categories")
ValueError: 'columns' must have known categories. Please use `df[columns].cat.as_known()` beforehand to ensure known categories
Then I tried:
user_item.song_id = user_item.song_id.astype('category').cat.as_known()
And I immediately got:
KeyError Traceback (most recent call last)
<timed exec> in <module>
~/anaconda3/lib/python3.6/site-packages/dask/dataframe/categorical.py in as_known(self, **kwargs)
187 if self.known:
188 return self._series
--> 189 categories = self._property_map('categories').unique().compute(**kwargs)
190 return self.set_categories(categories.values)
191
~/anaconda3/lib/python3.6/site-packages/dask/base.py in compute(self, **kwargs)
154 dask.base.compute
155 """
--> 156 (result,) = compute(self, traverse=False, **kwargs)
157 return result
158
~/anaconda3/lib/python3.6/site-packages/dask/base.py in compute(*args, **kwargs)
395 keys = [x.__dask_keys__() for x in collections]
396 postcomputes = [x.__dask_postcompute__() for x in collections]
--> 397 results = schedule(dsk, keys, **kwargs)
398 return repack([f(r, *a) for r, (f, a) in zip(results, postcomputes)])
399
~/anaconda3/lib/python3.6/site-packages/distributed/client.py in get(self, dsk, keys, restrictions, loose_restrictions, resources, sync, asynchronous, direct, retries, priority, fifo_timeout, actors, **kwargs)
2336 try:
2337 results = self.gather(packed, asynchronous=asynchronous,
-> 2338 direct=direct)
2339 finally:
2340 for f in futures.values():
~/anaconda3/lib/python3.6/site-packages/distributed/client.py in gather(self, futures, errors, maxsize, direct, asynchronous)
1660 return self.sync(self._gather, futures, errors=errors,
1661 direct=direct, local_worker=local_worker,
-> 1662 asynchronous=asynchronous)
1663
1664 #gen.coroutine
~/anaconda3/lib/python3.6/site-packages/distributed/client.py in sync(self, func, *args, **kwargs)
674 return future
675 else:
--> 676 return sync(self.loop, func, *args, **kwargs)
677
678 def __repr__(self):
~/anaconda3/lib/python3.6/site-packages/distributed/utils.py in sync(loop, func, *args, **kwargs)
275 e.wait(10)
276 if error[0]:
--> 277 six.reraise(*error[0])
278 else:
279 return result[0]
~/anaconda3/lib/python3.6/site-packages/six.py in reraise(tp, value, tb)
684 if value.__traceback__ is not tb:
685 raise value.with_traceback(tb)
--> 686 raise value
687
688 else:
~/anaconda3/lib/python3.6/site-packages/distributed/utils.py in f()
260 if timeout is not None:
261 future = gen.with_timeout(timedelta(seconds=timeout), future)
--> 262 result[0] = yield future
263 except Exception as exc:
264 error[0] = sys.exc_info()
~/anaconda3/lib/python3.6/site-packages/tornado/gen.py in run(self)
1131
1132 try:
-> 1133 value = future.result()
1134 except Exception:
1135 self.had_exception = True
~/anaconda3/lib/python3.6/site-packages/tornado/gen.py in run(self)
1139 if exc_info is not None:
1140 try:
-> 1141 yielded = self.gen.throw(*exc_info)
1142 finally:
1143 # Break up a reference to itself
~/anaconda3/lib/python3.6/site-packages/distributed/client.py in _gather(self, futures, errors, direct, local_worker)
1501 six.reraise(type(exception),
1502 exception,
-> 1503 traceback)
1504 if errors == 'skip':
1505 bad_keys.add(key)
~/anaconda3/lib/python3.6/site-packages/six.py in reraise(tp, value, tb)
683 value = tp()
684 if value.__traceback__ is not tb:
--> 685 raise value.with_traceback(tb)
686 raise value
687
/home/pi/env/lib/python3.5/site-packages/dask/dataframe/core.py in apply_and_enforce()
KeyError: '_func'
And the output of my workers is:
Exception: KeyError('_func',)
distributed.worker - WARNING - Compute Failed
Function: execute_task
args: ((<function apply at 0x764b3c90>, <function unique at 0x6ef24a50>, [(<function apply_and_enforce at 0x6eeede88>, <function Accessor._delegate_property at 0x6ef28198>, [(<function apply_and_enforce at 0x6eeede88>, <methodcaller: astype>, [(<built-in function getitem>, (<function apply at 0x764b3c90>, <function partial_by_order at 0x762ebd20>, [ msno ... interacted
0 vDi/nHqBu7wb+DtI2Ix4TupWQatUEFR41mDC0c8Voh8= ... 1
1 3IGfhB6dtaYxEGm20yFtRxN7KoFZjzGJbXPSjsjW5cM= ... 1
2 4QugsKXr1pJXSBj6CbSYCF6O7QY2/MHGICUU16p3fig= ... 1
3 i4g6DQpmkTuRCS6/osUsQ8GSBJM8261is4Q04NDGRPk= ... 1
4 TTNNMisplhps4y5gdQ6rsv0++TIKOOIIZLz05W97vFU= ... 1
5 sDR8kS+t73zE9QM8D03Zw3mVrsRXc0Nih/WRl02sfZI= ... 1
6 yiGYGWyGrCYHlMOtPv65urw9RfdH43PNGzu8TRaO+m8= ... 1
7 7lXXPZLRbAPWE5ILi2BFQVEhYzPz9cwNvuzIVCuHfZY= ... 1
8 4clHF4wjaFgY6+nQWoXm1EEAvB
kwargs: {}
Exception: KeyError('_func',)
If anyone Know how to fix this issue it would helps me a lot.
Solved by putting the same version of dask-distributed dask-core across all the workers, scheduler and client.

optunity package python optunity.maximize_structured issue

I am attaching the code . I am trying to replicate the code given in the link link. I am trying this code on my data. Below is the code and error that i am getting. Sorry for the bad formation of the question I tried to insert the code in a proper way but it doesnt seem to work for some commands.
search = {'algorithm': {'k-nn': {'n_neighbors': [1, 5]},
'SVM': {'kernel': {'linear': {'C': [0, 2]},
'rbf': {'gamma': [0, 1], 'C': [0, 10]},
'poly': {'degree': [2, 5], 'C': [0, 50], 'coef0': [0, 1]}
}
},
'naive-bayes': None,
'random-forest': {'n_estimators': [10, 30],
'max_features': [5, 20]}
}
}
#optunity.cross_validated(x=data, y=labels, num_folds=5)
def performance(x_train, y_train, x_test, y_test,
algorithm, n_neighbors=None, n_estimators=None, max_features=None,
kernel=None, C=None, gamma=None, degree=None, coef0=None):
# fit the model
if algorithm == 'k-nn':
model = KNeighborsClassifier(n_neighbors=int(n_neighbors))
model.fit(x_train, y_train)
elif algorithm == 'naive-bayes':
model = GaussianNB()
model.fit(x_train, y_train)
elif algorithm == 'random-forest':
model = RandomForestClassifier(n_estimators=int(n_estimators),
max_features=int(max_features))
model.fit(x_train, y_train)
# predict the test set
if algorithm == 'k-nn':
predictions = model.predict_proba(x_test)[:, 1]
else:
predictions = model.predict_proba(x_test)[:, 1]
return optunity.metrics.roc_auc(y_test, predictions, positive=True)
optimal_configuration, info, _ = optunity.maximize_structured(performance,
search_space=search,
num_evals=300)
This is the error message that I am getting
TypeError Traceback (most recent call last)
C:\ProgramData\Anaconda3\lib\site-packages\optunity\cross_validation.py in select(collection, indices)
76 try:
---> 77 return collection[indices, ...]
78 except IndexError: # caused by scipy.sparse in some versions
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\frame.py in __getitem__(self, key)
1963 else:
-> 1964 return self._getitem_column(key)
1965
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\frame.py in _getitem_column(self, key)
1970 if self.columns.is_unique:
-> 1971 return self._get_item_cache(key)
1972
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\generic.py in _get_item_cache(self, item)
1642 cache = self._item_cache
-> 1643 res = cache.get(item)
1644 if res is None:
TypeError: unhashable type: 'list'
During handling of the above exception, another exception occurred:
KeyError Traceback (most recent call last)
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
2441 try:
-> 2442 return self._engine.get_loc(key)
2443 except KeyError:
pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 26770
During handling of the above exception, another exception occurred:
KeyError Traceback (most recent call last)
<ipython-input-153-3d5c9d0e2047> in <module>()
1 optimal_configuration, info, _ = optunity.maximize_structured(performance,
2 search_space=search,
----> 3 num_evals=300)
C:\ProgramData\Anaconda3\lib\site-packages\optunity\api.py in maximize_structured(f, search_space, num_evals, pmap)
368 solver = make_solver(**suggestion)
369 solution, details = optimize(solver, f, maximize=True, max_evals=num_evals,
--> 370 pmap=pmap, decoder=tree.decode)
371 return solution, details, suggestion
372
C:\ProgramData\Anaconda3\lib\site-packages\optunity\api.py in optimize(solver, func, maximize, max_evals, pmap, decoder)
243 time = timeit.default_timer()
244 try:
--> 245 solution, report = solver.optimize(f, maximize, pmap=pmap)
246 except fun.MaximumEvaluationsException:
247 # early stopping because maximum number of evaluations is reached
C:\ProgramData\Anaconda3\lib\site-packages\optunity\solvers\ParticleSwarm.py in optimize(self, f, maximize, pmap)
269 for g in range(self.num_generations):
270 fitnesses = pmap(evaluate, list(map(self.particle2dict, pop)))
--> 271 for part, fitness in zip(pop, fitnesses):
272 part.fitness = fit * util.score(fitness)
273 if not part.best or part.best_fitness < part.fitness:
C:\ProgramData\Anaconda3\lib\site-packages\optunity\solvers\ParticleSwarm.py in evaluate(d)
257 #functools.wraps(f)
258 def evaluate(d):
--> 259 return f(**d)
260
261 if maximize:
C:\ProgramData\Anaconda3\lib\site-packages\optunity\functions.py in wrapped_f(*args, **kwargs)
354 else:
355 wrapped_f.num_evals += 1
--> 356 return f(*args, **kwargs)
357 wrapped_f.num_evals = 0
358 return wrapped_f
C:\ProgramData\Anaconda3\lib\site-packages\optunity\constraints.py in wrapped_f(*args, **kwargs)
149 def wrapped_f(*args, **kwargs):
150 try:
--> 151 return f(*args, **kwargs)
152 except ConstraintViolation:
153 return default
C:\ProgramData\Anaconda3\lib\site-packages\optunity\constraints.py in wrapped_f(*args, **kwargs)
127 if violations:
128 raise ConstraintViolation(violations, *args, **kwargs)
--> 129 return f(*args, **kwargs)
130 wrapped_f.constraints = constraints
131 return wrapped_f
C:\ProgramData\Anaconda3\lib\site-packages\optunity\constraints.py in func(*args, **kwargs)
264 #functions.wraps(f)
265 def func(*args, **kwargs):
--> 266 return f(*args, **kwargs)
267 return func
268
C:\ProgramData\Anaconda3\lib\site-packages\optunity\search_spaces.py in wrapped(**kwargs)
324 def wrapped(**kwargs):
325 decoded = self.decode(kwargs)
--> 326 return f(**decoded)
327 return wrapped
328
C:\ProgramData\Anaconda3\lib\site-packages\optunity\functions.py in wrapped_f(*args, **kwargs)
299 value = wrapped_f.call_log.get(*args, **kwargs)
300 if value is None:
--> 301 value = f(*args, **kwargs)
302 wrapped_f.call_log.insert(value, *args, **kwargs)
303 return value
C:\ProgramData\Anaconda3\lib\site-packages\optunity\cross_validation.py in __call__(self, *args, **kwargs)
396 for i in range(self.num_folds)
397 if not i == fold]))
--> 398 kwargs['x_train'] = select(self.x, rows_train)
399 kwargs['x_test'] = select(self.x, rows_test)
400 if not self.y is None: # dealing with a supervised algorithm
C:\ProgramData\Anaconda3\lib\site-packages\optunity\cross_validation.py in select(collection, indices)
82 indexset = set(indices)
83 return collection.zipWithIndex().filter(lambda x: x[1] in indexset).map(lambda x: x[0])
---> 84 return [collection[i] for i in indices]
85
86
C:\ProgramData\Anaconda3\lib\site-packages\optunity\cross_validation.py in <listcomp>(.0)
82 indexset = set(indices)
83 return collection.zipWithIndex().filter(lambda x: x[1] in indexset).map(lambda x: x[0])
---> 84 return [collection[i] for i in indices]
85
86
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\frame.py in __getitem__(self, key)
1962 return self._getitem_multilevel(key)
1963 else:
-> 1964 return self._getitem_column(key)
1965
1966 def _getitem_column(self, key):
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\frame.py in _getitem_column(self, key)
1969 # get column
1970 if self.columns.is_unique:
-> 1971 return self._get_item_cache(key)
1972
1973 # duplicate columns & possible reduce dimensionality
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\generic.py in _get_item_cache(self, item)
1643 res = cache.get(item)
1644 if res is None:
-> 1645 values = self._data.get(item)
1646 res = self._box_item_values(item, values)
1647 cache[item] = res
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\internals.py in get(self, item, fastpath)
3588
3589 if not isnull(item):
-> 3590 loc = self.items.get_loc(item)
3591 else:
3592 indexer = np.arange(len(self.items))[isnull(self.items)]
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
2442 return self._engine.get_loc(key)
2443 except KeyError:
-> 2444 return self._engine.get_loc(self._maybe_cast_indexer(key))
2445
2446 indexer = self.get_indexer([key], method=method, tolerance=tolerance)
pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 26770
So I got the answer.The format of input data set needed to be in array and the label as a boolean list.

Categories