KeyError: u'no item named 0' comes up with histogram - python

I've seen some similar questions on here, but none that seem to be having the exact same problem as me. I'm trying to create a histogram of chemical data. The error in other instances seemed to be related to a missing column, but my data doesn't (and shouldn't) have a column named "0". Here is my code and the error message:
%pylab inline
import matplotlib.pyplot as plt
import pandas as pd
plt.figure()
#importing the data
genesis = pd.read_csv(r'C:\Connors Temp\...... (878-15G)\Task_4 (Genesis)\genesis_MWMP.csv')
arsenic = genesis[['Code','Arsenic']]
antimony = genesis[['Code','Antimony']]
plt.hist(antimony)
KeyError Traceback (most recent call last)
<ipython-input-7-c537deba42d9> in <module>()
----> 1 plt.hist(antimony)
C:\Python27\lib\site-packages\matplotlib\pyplot.pyc in hist(x, bins, range, normed, weights, cumulative, bottom, histtype, align, orientation, rwidth, log, color, label, stacked, hold, **kwargs)
2655 histtype=histtype, align=align, orientation=orientation,
2656 rwidth=rwidth, log=log, color=color, label=label,
-> 2657 stacked=stacked, **kwargs)
2658 draw_if_interactive()
2659 finally:
C:\Python27\lib\site-packages\matplotlib\axes.pyc in hist(self, x, bins, range, normed, weights, cumulative, bottom, histtype, align, orientation, rwidth, log, color, label, stacked, **kwargs)
8010 # Massage 'x' for processing.
8011 # NOTE: Be sure any changes here is also done below to 'weights'
-> 8012 if isinstance(x, np.ndarray) or not iterable(x[0]):
8013 # TODO: support masked arrays;
8014 x = np.asarray(x)
C:\Python27\lib\site-packages\pandas\core\frame.pyc in __getitem__(self, key)
1805 raise ValueError('Cannot index using non-boolean DataFrame')
1806 else:
-> 1807 return self._get_item_cache(key)
1808
1809 def _getitem_array(self, key):
C:\Python27\lib\site-packages\pandas\core\generic.pyc in _get_item_cache(self, item)
529 return cache[item]
530 except Exception:
--> 531 values = self._data.get(item)
532 res = self._box_item_values(item, values)
533 cache[item] = res
C:\Python27\lib\site-packages\pandas\core\internals.pyc in get(self, item)
828
829 def get(self, item):
--> 830 _, block = self._find_block(item)
831 return block.get(item)
832
C:\Python27\lib\site-packages\pandas\core\internals.pyc in _find_block(self, item)
942
943 def _find_block(self, item):
--> 944 self._check_have(item)
945 for i, block in enumerate(self.blocks):
946 if item in block:
C:\Python27\lib\site-packages\pandas\core\internals.pyc in _check_have(self, item)
949 def _check_have(self, item):
950 if item not in self.items:
--> 951 raise KeyError('no item named %s' % com.pprint_thing(item))
952
953 def reindex_axis(self, new_axis, method=None, axis=0, copy=True):
KeyError: u'no item named 0'

If you're using the lower-level libraries (that is, not pandas's wrappers for them), you probably should use
hist(antimony.Antimony.values)
(see thehist documentation for more).

hist takes a 1-dimensional array. Does this work?
antimony.Antimony.hist()

Related

How to make use of the innvestigate package for LRP with tabular data?

I am trying to get LIME and LRP working on a simple DNN with tabular data for some general usability evaluations of the two approaches with non-techsavy users. I managed to get LIME running to get an insight into the predictions of my network. Yet, I am having trouble implementing LRP with innvestigate to get insights into those predicitons.
So far I have created a relevance-based analyzer with:
import innvestigate
#create instance to analyze later on
data = (X_train,
y_train,
X_test,
y_test)
instance_to_test = data[2][7:8]
#create LRP analyzer
LRP_analyzer = innvestigate.analyzer.relevance_based.relevance_analyzer.LRPZ(model)
#analyze the specified instance
analysis = LRP_analyzer.analyze(instance_to_test)
Yet, after that I am stuck. What I want to do at the end, is taking my specified instance of the data and create a heatmap of the contributions to the classification result (meaning: relevance scores of the features for that instance). Has anybody worked with innvestigate (and this somewhat special use-case, as it is mainly used for image data) and can help me with that? I know that it should in theory be possible as I have seen LRP used with tabular data in this paper. Besides my code above I recieve the following error message when trying to get a heatmap displayed (see bottom of the post).
Yet, that should be because the general implementation requires an image tensor with 4 axes, which I can't provide. Any ideas for a workaround to make that happen for tabular data?
TypeError Traceback (most recent call last)
Input In [36], in <cell line: 1>()
----> 1 plt.imshow(analysis.squeeze(), cmap="seismic")
2 plt.show()
File D:\Programme\Anaconda\envs\LRP_innvestigate\lib\site-packages\matplotlib\_api\deprecation.py:459, in make_keyword_only.<locals>.wrapper(*args, **kwargs)
453 if len(args) > name_idx:
454 warn_deprecated(
455 since, message="Passing the %(name)s %(obj_type)s "
456 "positionally is deprecated since Matplotlib %(since)s; the "
457 "parameter will become keyword-only %(removal)s.",
458 name=name, obj_type=f"parameter of {func.__name__}()")
--> 459 return func(*args, **kwargs)
File D:\Programme\Anaconda\envs\LRP_innvestigate\lib\site-packages\matplotlib\pyplot.py:2650, in imshow(X, cmap, norm, aspect, interpolation, alpha, vmin, vmax, origin, extent, interpolation_stage, filternorm, filterrad, resample, url, data, **kwargs)
2644 #_copy_docstring_and_deprecators(Axes.imshow)
2645 def imshow(
2646 X, cmap=None, norm=None, aspect=None, interpolation=None,
2647 alpha=None, vmin=None, vmax=None, origin=None, extent=None, *,
2648 interpolation_stage=None, filternorm=True, filterrad=4.0,
2649 resample=None, url=None, data=None, **kwargs):
-> 2650 __ret = gca().imshow(
2651 X, cmap=cmap, norm=norm, aspect=aspect,
2652 interpolation=interpolation, alpha=alpha, vmin=vmin,
2653 vmax=vmax, origin=origin, extent=extent,
2654 interpolation_stage=interpolation_stage,
2655 filternorm=filternorm, filterrad=filterrad, resample=resample,
2656 url=url, **({"data": data} if data is not None else {}),
2657 **kwargs)
2658 sci(__ret)
2659 return __ret
File D:\Programme\Anaconda\envs\LRP_innvestigate\lib\site-packages\matplotlib\_api\deprecation.py:459, in make_keyword_only.<locals>.wrapper(*args, **kwargs)
453 if len(args) > name_idx:
454 warn_deprecated(
455 since, message="Passing the %(name)s %(obj_type)s "
456 "positionally is deprecated since Matplotlib %(since)s; the "
457 "parameter will become keyword-only %(removal)s.",
458 name=name, obj_type=f"parameter of {func.__name__}()")
--> 459 return func(*args, **kwargs)
File D:\Programme\Anaconda\envs\LRP_innvestigate\lib\site-packages\matplotlib\__init__.py:1414, in _preprocess_data.<locals>.inner(ax, data, *args, **kwargs)
1411 #functools.wraps(func)
1412 def inner(ax, *args, data=None, **kwargs):
1413 if data is None:
-> 1414 return func(ax, *map(sanitize_sequence, args), **kwargs)
1416 bound = new_sig.bind(ax, *args, **kwargs)
1417 auto_label = (bound.arguments.get(label_namer)
1418 or bound.kwargs.get(label_namer))
File D:\Programme\Anaconda\envs\LRP_innvestigate\lib\site-packages\matplotlib\axes\_axes.py:5487, in Axes.imshow(self, X, cmap, norm, aspect, interpolation, alpha, vmin, vmax, origin, extent, interpolation_stage, filternorm, filterrad, resample, url, **kwargs)
5480 self.set_aspect(aspect)
5481 im = mimage.AxesImage(self, cmap, norm, interpolation,
5482 origin, extent, filternorm=filternorm,
5483 filterrad=filterrad, resample=resample,
5484 interpolation_stage=interpolation_stage,
5485 **kwargs)
-> 5487 im.set_data(X)
5488 im.set_alpha(alpha)
5489 if im.get_clip_path() is None:
5490 # image does not already have clipping set, clip to axes patch
File D:\Programme\Anaconda\envs\LRP_innvestigate\lib\site-packages\matplotlib\image.py:715, in _ImageBase.set_data(self, A)
711 self._A = self._A[:, :, 0]
713 if not (self._A.ndim == 2
714 or self._A.ndim == 3 and self._A.shape[-1] in [3, 4]):
--> 715 raise TypeError("Invalid shape {} for image data"
716 .format(self._A.shape))
718 if self._A.ndim == 3:
719 # If the input data has values outside the valid range (after
720 # normalisation), we issue a warning and then clip X to the bounds
721 # - otherwise casting wraps extreme values, hiding outliers and
722 # making reliable interpretation impossible.
723 high = 255 if np.issubdtype(self._A.dtype, np.integer) else 1
TypeError: Invalid shape (10,) for image data

Block third-party module traceback in Ipython

Currently if i run some error code in IPython with Pandas, for example, a script named temp.py
import pandas as pd
temp = pd.Series(range(5))
print(temp.loc[6])
then run command %run temp.py in IPython console, I get some error info like
KeyError Traceback (most recent call last)
D:\codes\python\test.py in <module>
2
3 temp = pd.Series(range(5))
----> 4 print(temp.loc[6])
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\indexing.py in __getitem__(self, key)
929
930 maybe_callable = com.apply_if_callable(key, self.obj)
--> 931 return self._getitem_axis(maybe_callable, axis=axis)
932
933 def _is_scalar_access(self, key: tuple):
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\indexing.py in _getitem_axis(self, key, axis)
1162 # fall thru to straight lookup
1163 self._validate_key(key, axis)
-> 1164 return self._get_label(key, axis=axis)
1165
1166 def _get_slice_axis(self, slice_obj: slice, axis: int):
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\indexing.py in _get_label(self, label, axis)
1111 def _get_label(self, label, axis: int):
1112 # GH#5667 this will fail if the label is not present in the axis.
-> 1113 return self.obj.xs(label, axis=axis)
1114
1115 def _handle_lowerdim_multi_index_axis0(self, tup: tuple):
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\generic.py in xs(self, key, axis, level, drop_level)
3774 raise TypeError(f"Expected label or tuple of labels, got {key}") from e
3775 else:
-> 3776 loc = index.get_loc(key)
3777
3778 if isinstance(loc, np.ndarray):
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\indexes\range.py in get_loc(self, key, method, tolerance)
385 return self._range.index(new_key)
386 except ValueError as err:
--> 387 raise KeyError(key) from err
388 raise KeyError(key)
389 return super().get_loc(key, method=method, tolerance=tolerance)
KeyError: 6
the error info here is too long and useless, if I want to get the error code of my own, I need to scroll up to find what is the error place of my own.
Also after that when I run %debug command, I also got into the pandas function and local vars instead of my own code.
So is there a way to make IPython show error info and debug just in my own code?

Error when not passing an kwargs in wraped function

I'm passing kwargs to a wrapped function, and when I call the function informing some kwargs, it works ok, but when I don't pass any kwargs as parameters, I receive the error
<__array_function__ internals> in concatenate(*args, **kwargs).
Should kwargs be optional?
My functions looks like follows:
def group_data(df:pd.DataFrame, group:Union[list, str], cols:Union[list, str], func:Union[list, Callable]) -> DataFrame:
df = df.groupby(group)[cols].agg(func)
if callable(func):
df.rename(f'{func.__name__}', inplace=True)
else:
df.columns = [x.__name__ for x in func]
return df
def print_subplots(df:pd.DataFrame, x:str, y:Union[list, str, Callable], nrows:int,
ncols:int, func:Callable, #order:Union[list, str] = None,
gr_by:Union[list, str] = None, title:str = None, figsize:tuple=(15,6)
, **kwargs):
fig, axes = plt.subplots(nrows, ncols, figsize=figsize)
fig.suptitle(title)
for i, element in enumerate(y):
if type(element) == list:
df_grouped = feature.group_data(df, gr_by, element[0], element[1])
func(
ax=axes[i],
data=df_grouped.reset_index(),
x=gr_by[0],
y=element[1].__name__,
**kwargs
)
else:
func(ax=axes[i], data=df, x=x, y=element, **kwargs)
When I call :
print_subplots(
pricing,
'search_weekday',
['z_score', ['z_score', np.median]],
nrows=1,
ncols=2,
func=sns.boxplot,
gr_by=['search_weekday', 'checkin'],
figsize=(20,6),
order=order
)
...the function works fine, in this case order is a kwarg.
But when I call:
print_subplots(
pricing,
'search_weekday',
['z_score', ['z_score', np.median]],
nrows=1,
ncols=2,
func=sns.lineplot,
gr_by=['search_weekday', 'checkin'],
figsize=(20,6)
)
...causes a loop and when I stop the process I get the error:
---------------------------------------------------------------------------
KeyboardInterrupt Traceback (most recent call last)
/tmp/ipykernel_192586/2963703819.py in <module>
----> 1 plots.print_subplots(
2 pricing,
3 'search_weekday',
4 ['z_score', ['z_score', np.median]],
5 nrows=1,
~/Documentos/Work/2021/ota-searches/dsc-notebooks/ota_searchs/src/visualization/plots.py in print_subplots(df, x, y, nrows, ncols, func, gr_by, title, figsize, **kwargs)
75 )
76 else:
---> 77 func(ax=axes[i], data=df, x=x, y=element, **kwargs)
~/Documentos/Work/2021/ota-searches/dsc-notebooks/ota_searchs/ota/lib/python3.8/site-packages/seaborn/_decorators.py in inner_f(*args, **kwargs)
44 )
45 kwargs.update({k: arg for k, arg in zip(sig.parameters, args)})
---> 46 return f(**kwargs)
47 return inner_f
48
~/Documentos/Work/2021/ota-searches/dsc-notebooks/ota_searchs/ota/lib/python3.8/site-packages/seaborn/relational.py in lineplot(x, y, hue, size, style, data, palette, hue_order, hue_norm, sizes, size_order, size_norm, dashes, markers, style_order, units, estimator, ci, n_boot, seed, sort, err_style, err_kws, legend, ax, **kwargs)
708 p._attach(ax)
709
--> 710 p.plot(ax, kwargs)
711 return ax
712
~/Documentos/Work/2021/ota-searches/dsc-notebooks/ota_searchs/ota/lib/python3.8/site-packages/seaborn/relational.py in plot(self, ax, kws)
469 # Loop over the semantic subsets and add to the plot
470 grouping_vars = "hue", "size", "style"
--> 471 for sub_vars, sub_data in self.iter_data(grouping_vars, from_comp_data=True):
472
473 if self.sort:
~/Documentos/Work/2021/ota-searches/dsc-notebooks/ota_searchs/ota/lib/python3.8/site-packages/seaborn/_core.py in iter_data(self, grouping_vars, reverse, from_comp_data)
981
982 if from_comp_data:
--> 983 data = self.comp_data
984 else:
985 data = self.plot_data
~/Documentos/Work/2021/ota-searches/dsc-notebooks/ota_searchs/ota/lib/python3.8/site-packages/seaborn/_core.py in comp_data(self)
1055 orig = self.plot_data[var].dropna()
1056 comp_col = pd.Series(index=orig.index, dtype=float, name=var)
-> 1057 comp_col.loc[orig.index] = pd.to_numeric(axis.convert_units(orig))
1058
1059 if axis.get_scale() == "log":
~/Documentos/Work/2021/ota-searches/dsc-notebooks/ota_searchs/ota/lib/python3.8/site-packages/pandas/core/indexing.py in __setitem__(self, key, value)
717 else:
718 key = com.apply_if_callable(key, self.obj)
--> 719 indexer = self._get_setitem_indexer(key)
720 self._has_valid_setitem_indexer(key)
721
~/Documentos/Work/2021/ota-searches/dsc-notebooks/ota_searchs/ota/lib/python3.8/site-packages/pandas/core/indexing.py in _get_setitem_indexer(self, key)
664
665 try:
--> 666 return self._convert_to_indexer(key, axis=0, is_setter=True)
667 except TypeError as e:
668
~/Documentos/Work/2021/ota-searches/dsc-notebooks/ota_searchs/ota/lib/python3.8/site-packages/pandas/core/indexing.py in _convert_to_indexer(self, key, axis, is_setter)
1255 return inds
1256 else:
-> 1257 return self._get_listlike_indexer(key, axis)[1]
1258 else:
1259 try:
~/Documentos/Work/2021/ota-searches/dsc-notebooks/ota_searchs/ota/lib/python3.8/site-packages/pandas/core/indexing.py in _get_listlike_indexer(self, key, axis)
1310 keyarr = ax.reindex(keyarr)[0]
1311 else:
-> 1312 keyarr, indexer, new_indexer = ax._reindex_non_unique(keyarr)
1313
1314 self._validate_read_indexer(keyarr, indexer, axis)
~/Documentos/Work/2021/ota-searches/dsc-notebooks/ota_searchs/ota/lib/python3.8/site-packages/pandas/core/indexes/base.py in _reindex_non_unique(self, target)
3865 return self[:0], np.array([], dtype=np.intp), None
3866
-> 3867 indexer, missing = self.get_indexer_non_unique(target)
3868 check = indexer != -1
3869 new_labels = self.take(indexer[check])
~/Documentos/Work/2021/ota-searches/dsc-notebooks/ota_searchs/ota/lib/python3.8/site-packages/pandas/core/indexes/base.py in get_indexer_non_unique(self, target)
5256 tgt_values = target._get_engine_target()
5257
-> 5258 indexer, missing = self._engine.get_indexer_non_unique(tgt_values)
5259 return ensure_platform_int(indexer), ensure_platform_int(missing)
5260
~/Documentos/Work/2021/ota-searches/dsc-notebooks/ota_searchs/ota/lib/python3.8/site-packages/pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_indexer_non_unique()
<__array_function__ internals> in resize(*args, **kwargs)
~/Documentos/Work/2021/ota-searches/dsc-notebooks/ota_searchs/ota/lib/python3.8/site-packages/numpy/core/fromnumeric.py in resize(a, new_shape)
1415 extra = Na - extra
1416
-> 1417 a = concatenate((a,) * n_copies)
1418 if extra > 0:
1419 a = a[:-extra]
<__array_function__ internals> in concatenate(*args, **kwargs)

Making choropleth map from dataframe using folium

I'm trying to make a choropleth map using pandas to read csv data. I followed a tutorial online, but didn't get my code to work.
Tutorial link: https://towardsdatascience.com/choropleth-maps-with-folium-1a5b8bcdd392. Here is the code i'm trying to get to work:
# CHOLOPLETH MAP
import json
kunnat_geo = r'kunnat.geojson'
coords = pd.read_csv('taulu2.csv')
map_cholo = folium.Map(location=[65,26], zoom_start=4, tiles='stamenwatercolor')
map_cholo.choropleth(
geo_data=kunnat_geo,
data=coords,
columns=['ALUE', 'MIELENTERVEYDEN KUNTOUTUSKOTIEN ASIAKKAAT VUONNA 2018'],
key_on='features.properties.Kunta',
fill_color='YlGnBu',
fill_opacity=1,
line_opacity=1,
legend_name='MIELENTERVEYDEN KUNTOUTUSKOTIEN ASIAKKAAT VUONNA 2018',
smooth_factor=0)
map_cholo
However it gives me an attribute error:
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-74-026c8c7bfd66> in <module>
14 line_opacity=1,
15 legend_name='MIELENTERVEYDEN KUNTOUTUSKOTIEN ASIAKKAAT VUONNA 2018',
---> 16 smooth_factor=0)
17
18 map_cholo
D:\Anaconda3\lib\site-packages\folium\folium.py in choropleth(self, *args, **kwargs)
416 )
417 from folium.features import Choropleth
--> 418 self.add_child(Choropleth(*args, **kwargs))
419
420 def keep_in_front(self, *args):
D:\Anaconda3\lib\site-packages\folium\features.py in __init__(self, geo_data, data, columns, key_on, bins, fill_color, nan_fill_color, fill_opacity, nan_fill_opacity, line_color, line_weight, line_opacity, name, legend_name, overlay, control, show, topojson, smooth_factor, highlight, **kwargs)
1249 style_function=style_function,
1250 smooth_factor=smooth_factor,
-> 1251 highlight_function=highlight_function if highlight else None)
1252
1253 self.add_child(self.geojson)
D:\Anaconda3\lib\site-packages\folium\features.py in __init__(self, data, style_function, highlight_function, name, overlay, control, show, smooth_factor, tooltip, embed, popup)
456 self.convert_to_feature_collection()
457 if self.style:
--> 458 self._validate_function(style_function, 'style_function')
459 self.style_function = style_function
460 self.style_map = {}
D:\Anaconda3\lib\site-packages\folium\features.py in _validate_function(self, func, name)
521 """
522 test_feature = self.data['features'][0]
--> 523 if not callable(func) or not isinstance(func(test_feature), dict):
524 raise ValueError('{} should be a function that accepts items from '
525 'data[\'features\'] and returns a dictionary.'
D:\Anaconda3\lib\site-packages\folium\features.py in style_function(x)
1223
1224 def style_function(x):
-> 1225 color, opacity = color_scale_fun(x)
1226 return {
1227 'weight': line_weight,
D:\Anaconda3\lib\site-packages\folium\features.py in color_scale_fun(x)
1204
1205 def color_scale_fun(x):
-> 1206 key_of_x = get_by_key(x, key_on)
1207 if key_of_x is None:
1208 raise ValueError("key_on `{!r}` not found in GeoJSON.".format(key_on))
D:\Anaconda3\lib\site-packages\folium\features.py in get_by_key(obj, key)
1201 return (obj.get(key, None) if len(key.split('.')) <= 1 else
1202 get_by_key(obj.get(key.split('.')[0], None),
-> 1203 '.'.join(key.split('.')[1:])))
1204
1205 def color_scale_fun(x):
D:\Anaconda3\lib\site-packages\folium\features.py in get_by_key(obj, key)
1200 def get_by_key(obj, key):
1201 return (obj.get(key, None) if len(key.split('.')) <= 1 else
-> 1202 get_by_key(obj.get(key.split('.')[0], None),
1203 '.'.join(key.split('.')[1:])))
1204
AttributeError: 'NoneType' object has no attribute 'get'
Here is the geojson file im using: https://raw.githubusercontent.com/varmais/maakunnat/master/kunnat.geojson
And the data: https://pastebin.com/SdEXDM89
The geojson you are using contains multiple flaws. Just an example, look at
{"id":49,"type":"Feature","geometry":{"coordinates":[[[[25.134789950981,65.0613390595066],[24.9509796014637,65.1411214235741],[24.1149334366251,65.1739425977789],[24.1374948169974,65.2416484128856],[24.6071512739822,65.211492673253],[25.2249108920834,65.2366422852267],[25.319454962569,65.2626286706296],[25.5524286749097,65.2529182323504],[25.6623616596278,65.260748036272],[25.7723806903971,65.2919894627541],[25.9132907485861,65.2931966157892],[26.0266782370841,65.2703735338041],[25.993210256336,65.1914324603082],[25.736352838218,65.2201254797071],[25.5637124399186,65.1919505955019],[25.635197316715,65.1390540421563],[25.6086182968484,65.0816350751541],[25.3775958184099,65.1070832092577],[25.2892903033333,65.0817828240486],[25.134789950981,65.0613390595066]]]],"type":"MultiPolygon"},"properties":{"nationalCode":84,"Country":"FI","name_fi":"Haukipudas","name_se":"Haukipudas"}}
It does not have Kunta in properties. This makes your geojson bad-formed. I suggest you to replace that geojson, or edit it as the very last resort.

Memory Error when trying to save pandas DataFrame to disk using to_hdf()

I am trying to save a relatively big DataFrame (memory usage according to the info() method returns 663+ MB) using the to_hdf() method to a HDFstore.
But everytime I run into a "Memory Error".
Hence, I have two questions regarding this:
Is the memory error primarily because I don't have enough RAM (I have 16 GB)?
How can I save it, given my RAM restriction. Is there a way to sort of append it to the HDFstore in chunks.
I know there is a possibility to save it as a 'table' rather than 'fixed' and that allows appending but I have not tried that yet mostly because I was hoping for a simpler alternative.
Thanks a lot in advance :)
P.S. I would like to add that I tried to_pickle() and that worked smoothly so my assumption is, it couldn't have been a physical RAM problem.
Error:
MemoryError Traceback (most recent call last)
<ipython-input-10-05bb5886160a> in <module>()
1 train_data = pd.HDFStore('strat_train_data.h5')
----> 2 strat_train_set.to_hdf(train_data, 'strat_train_set')
C:\Users\IQBALSH\AppData\Local\Continuum\Anaconda3\lib\site-packages\pandas\core\generic.py in to_hdf(self, path_or_buf, key, **kwargs)
1280
1281 from pandas.io import pytables
-> 1282 return pytables.to_hdf(path_or_buf, key, self, **kwargs)
1283
1284 def to_msgpack(self, path_or_buf=None, encoding='utf-8', **kwargs):
C:\Users\IQBALSH\AppData\Local\Continuum\Anaconda3\lib\site-packages\pandas\io\pytables.py in to_hdf(path_or_buf, key, value, mode, complevel, complib, append, **kwargs)
268 f(store)
269 else:
--> 270 f(path_or_buf)
271
272
C:\Users\IQBALSH\AppData\Local\Continuum\Anaconda3\lib\site-packages\pandas\io\pytables.py in <lambda>(store)
260 f = lambda store: store.append(key, value, **kwargs)
261 else:
--> 262 f = lambda store: store.put(key, value, **kwargs)
263
264 path_or_buf = _stringify_path(path_or_buf)
C:\Users\IQBALSH\AppData\Local\Continuum\Anaconda3\lib\site-packages\pandas\io\pytables.py in put(self, key, value, format, append, **kwargs)
869 format = get_option("io.hdf.default_format") or 'fixed'
870 kwargs = self._validate_format(format, kwargs)
--> 871 self._write_to_group(key, value, append=append, **kwargs)
872
873 def remove(self, key, where=None, start=None, stop=None):
C:\Users\IQBALSH\AppData\Local\Continuum\Anaconda3\lib\site-packages\pandas\io\pytables.py in _write_to_group(self, key, value, format, index, append, complib, encoding, **kwargs)
1311
1312 # write the object
-> 1313 s.write(obj=value, append=append, complib=complib, **kwargs)
1314
1315 if s.is_table and index:
C:\Users\IQBALSH\AppData\Local\Continuum\Anaconda3\lib\site-packages\pandas\io\pytables.py in write(self, obj, **kwargs)
2890 # I have no idea why, but writing values before items fixed #2299
2891 blk_items = data.items.take(blk.mgr_locs)
-> 2892 self.write_array('block%d_values' % i, blk.values, items=blk_items)
2893 self.write_index('block%d_items' % i, blk_items)
2894
C:\Users\IQBALSH\AppData\Local\Continuum\Anaconda3\lib\site-packages\pandas\io\pytables.py in write_array(self, key, value, items)
2658 vlarr = self._handle.create_vlarray(self.group, key,
2659 _tables().ObjectAtom())
-> 2660 vlarr.append(value)
2661 else:
2662 if empty_array:
C:\Users\IQBALSH\AppData\Local\Continuum\Anaconda3\lib\site-packages\tables\vlarray.py in append(self, sequence)
517 atom = self.atom
518 if not hasattr(atom, 'size'): # it is a pseudo-atom
--> 519 sequence = atom.toarray(sequence)
520 statom = atom.base
521 else:
C:\Users\IQBALSH\AppData\Local\Continuum\Anaconda3\lib\site-packages\tables\atom.py in toarray(self, object_)
1051
1052 def toarray(self, object_):
-> 1053 buffer_ = self._tobuffer(object_)
1054 array = numpy.ndarray(buffer=buffer_, dtype=self.base.dtype,
1055 shape=len(buffer_))
C:\Users\IQBALSH\AppData\Local\Continuum\Anaconda3\lib\site-packages\tables\atom.py in _tobuffer(self, object_)
1171
1172 def _tobuffer(self, object_):
-> 1173 return pickle.dumps(object_, pickle.HIGHEST_PROTOCOL)
1174
1175 def fromarray(self, array):
MemoryError:

Categories