Why KeyError while plottin a pandas data frame with matplotlib? [duplicate] - python

This question already has answers here:
KeyError when plotting a sliced pandas dataframe with datetimes
(3 answers)
Closed 7 years ago.
I have this data frame:
date_obj col1 col2 col3 col4
40038 2012-11-19 1.000 0.831856 0.986209 0.843919
40039 2012-11-20 2.015 0.521764 1.177320 0.938245
40040 2012-11-21 1.160 1.645345 1.964620 4.536440
40041 2012-11-22 3.171 2.444018 2.931550 3.737840
40042 2012-11-23 4.563 3.208111 3.587250 2.434040
40043 2012-11-24 5.379 3.863732 3.824540 1.634780
40044 2012-11-26 1.125 20.756739 4.162820 23.552100
40045 2012-11-27 3.340 5.369354 4.535090 1.129290
40046 2012-11-28 5.463 12.185730 8.102790 1.224300
40047 2012-11-29 6.596 14.328685 9.271000 24.655600
40048 2012-11-30 31.544 13.513497 12.103400 21.273500
40049 2012-12-01 24.921 26.144050 16.256200 13.883100
40050 2012-12-03 5.488 2.581351 7.220790 3.349450
40051 2012-12-04 6.977 5.893819 5.548870 2.948770
40052 2012-12-05 7.115 6.533022 5.863820 2.517030
40053 2012-12-06 5.842 8.754232 7.518660 1.447940
40054 2012-12-07 6.346 12.018631 10.263100 11.837400
40055 2012-12-08 17.666 4.548846 10.610400 11.110800
40056 2012-12-10 4.300 2.823566 1.475000 1.989210
40057 2012-12-11 2.415 2.436319 2.677440 2.908270
40058 2012-12-12 2.319 2.121092 3.455550 3.890480
40059 2012-12-13 1.000 1.633918 3.858540 4.316940
40060 2012-12-14 2.238 1.688475 5.065990 5.267850
40061 2012-12-15 1.798 2.621267 7.175370 6.957340
I try to plot it in the following way:
plt.figure(figsize=(17, 10))
plt.setp(plt.xticks()[1], rotation=45)
plt.plot_date(df_cut['date_obj'],df_cut['col1'], color='black', linestyle='-', markersize=3, linewidth=2)
plt.plot_date(df_cut['date_obj'],df_cut['col2'], color='red', linestyle='-', markersize=3)
plt.plot_date(df_cut['date_obj'],df_cut['col3'], color='green', linestyle='-', markersize=3)
plt.plot_date(df_cut['date_obj'],df_cut['col4'], color='blue', linestyle='-', markersize=3)
As a result I get an error:
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
<ipython-input-544-1b8650d1e7e7> in <module>()
/ipython/local/lib/python2.7/site-packages/matplotlib/pyplot.pyc in plot_date(x, y, fmt, tz, xdate, ydate, hold, **kwargs)
2850 try:
2851 ret = ax.plot_date(x, y, fmt=fmt, tz=tz, xdate=xdate, ydate=ydate,
-> 2852 **kwargs)
2853 draw_if_interactive()
2854 finally:
ipython/local/lib/python2.7/site-packages/matplotlib/axes.pyc in plot_date(self, x, y, fmt, tz, xdate, ydate, **kwargs)
4061 if not self._hold: self.cla()
4062
-> 4063 ret = self.plot(x, y, fmt, **kwargs)
4064
4065 if xdate:
ipython/local/lib/python2.7/site-packages/matplotlib/axes.pyc in plot(self, *args, **kwargs)
3994 lines = []
3995
-> 3996 for line in self._get_lines(*args, **kwargs):
3997 self.add_line(line)
3998 lines.append(line)
ipython/local/lib/python2.7/site-packages/matplotlib/axes.pyc in _grab_next_args(self, *args, **kwargs)
328 return
329 if len(remaining) <= 3:
--> 330 for seg in self._plot_args(remaining, kwargs):
331 yield seg
332 return
ipython/local/lib/python2.7/site-packages/matplotlib/axes.pyc in _plot_args(self, tup, kwargs)
306 x = np.arange(y.shape[0], dtype=float)
307
--> 308 x, y = self._xy_from_xy(x, y)
309
310 if self.command == 'plot':
python/local/lib/python2.7/site-packages/matplotlib/axes.pyc in _xy_from_xy(self, x, y)
222 def _xy_from_xy(self, x, y):
223 if self.axes.xaxis is not None and self.axes.yaxis is not None:
--> 224 bx = self.axes.xaxis.update_units(x)
225 by = self.axes.yaxis.update_units(y)
226
ipython/local/lib/python2.7/site-packages/matplotlib/axis.pyc in update_units(self, data)
1299 neednew = self.converter != converter
1300 self.converter = converter
-> 1301 default = self.converter.default_units(data, self)
1302 #print 'update units: default=%s, units=%s'%(default, self.units)
1303 if default is not None and self.units is None:
ipython/local/lib/python2.7/site-packages/matplotlib/dates.pyc in default_units(x, axis)
1156 'Return the tzinfo instance of *x* or of its first element, or None'
1157 try:
-> 1158 x = x[0]
1159 except (TypeError, IndexError):
1160 pass
ipython/local/lib/python2.7/site-packages/pandas/core/series.pyc in __getitem__(self, key)
611 def __getitem__(self, key):
612 try:
--> 613 return self.index.get_value(self, key)
614 except InvalidIndexError:
615 pass
ipython/local/lib/python2.7/site-packages/pandas/core/index.pyc in get_value(self, series, key)
761 """
762 try:
--> 763 return self._engine.get_value(series, key)
764 except KeyError, e1:
765 if len(self) > 0 and self.inferred_type == 'integer':
What is strange, this code works for some data frames and for some it doesn't. The data frames are not different by their structure. The only difference between them is only in values that they contain.
Could anybody please help me to resolve this problem?

Dataframe store dates as numpy.datetime64 objects, not python datetime objects.
Furthermore matplotlib.plot_date uses its own numeric representation of dates.
You could draw your data this way:
plt.plot_date(matplotlib.dates.date2num(pandas.to_datetime(df_cut['date_obj'].values)),df_cut['col1'].values, color='black', linestyle='-', markersize=3, linewidth=2)
Or you could define column 'date_obj' as the index of your data:
df0 = pd.DataFrame.from_records(YourDataSource, columns=['date_obj','col1','col2','col3','col4'],index='date_obj')
And then simply use pandas' plot() attribute:
df0['col1'].plot()

Related

Error when not passing an kwargs in wraped function

I'm passing kwargs to a wrapped function, and when I call the function informing some kwargs, it works ok, but when I don't pass any kwargs as parameters, I receive the error
<__array_function__ internals> in concatenate(*args, **kwargs).
Should kwargs be optional?
My functions looks like follows:
def group_data(df:pd.DataFrame, group:Union[list, str], cols:Union[list, str], func:Union[list, Callable]) -> DataFrame:
df = df.groupby(group)[cols].agg(func)
if callable(func):
df.rename(f'{func.__name__}', inplace=True)
else:
df.columns = [x.__name__ for x in func]
return df
def print_subplots(df:pd.DataFrame, x:str, y:Union[list, str, Callable], nrows:int,
ncols:int, func:Callable, #order:Union[list, str] = None,
gr_by:Union[list, str] = None, title:str = None, figsize:tuple=(15,6)
, **kwargs):
fig, axes = plt.subplots(nrows, ncols, figsize=figsize)
fig.suptitle(title)
for i, element in enumerate(y):
if type(element) == list:
df_grouped = feature.group_data(df, gr_by, element[0], element[1])
func(
ax=axes[i],
data=df_grouped.reset_index(),
x=gr_by[0],
y=element[1].__name__,
**kwargs
)
else:
func(ax=axes[i], data=df, x=x, y=element, **kwargs)
When I call :
print_subplots(
pricing,
'search_weekday',
['z_score', ['z_score', np.median]],
nrows=1,
ncols=2,
func=sns.boxplot,
gr_by=['search_weekday', 'checkin'],
figsize=(20,6),
order=order
)
...the function works fine, in this case order is a kwarg.
But when I call:
print_subplots(
pricing,
'search_weekday',
['z_score', ['z_score', np.median]],
nrows=1,
ncols=2,
func=sns.lineplot,
gr_by=['search_weekday', 'checkin'],
figsize=(20,6)
)
...causes a loop and when I stop the process I get the error:
---------------------------------------------------------------------------
KeyboardInterrupt Traceback (most recent call last)
/tmp/ipykernel_192586/2963703819.py in <module>
----> 1 plots.print_subplots(
2 pricing,
3 'search_weekday',
4 ['z_score', ['z_score', np.median]],
5 nrows=1,
~/Documentos/Work/2021/ota-searches/dsc-notebooks/ota_searchs/src/visualization/plots.py in print_subplots(df, x, y, nrows, ncols, func, gr_by, title, figsize, **kwargs)
75 )
76 else:
---> 77 func(ax=axes[i], data=df, x=x, y=element, **kwargs)
~/Documentos/Work/2021/ota-searches/dsc-notebooks/ota_searchs/ota/lib/python3.8/site-packages/seaborn/_decorators.py in inner_f(*args, **kwargs)
44 )
45 kwargs.update({k: arg for k, arg in zip(sig.parameters, args)})
---> 46 return f(**kwargs)
47 return inner_f
48
~/Documentos/Work/2021/ota-searches/dsc-notebooks/ota_searchs/ota/lib/python3.8/site-packages/seaborn/relational.py in lineplot(x, y, hue, size, style, data, palette, hue_order, hue_norm, sizes, size_order, size_norm, dashes, markers, style_order, units, estimator, ci, n_boot, seed, sort, err_style, err_kws, legend, ax, **kwargs)
708 p._attach(ax)
709
--> 710 p.plot(ax, kwargs)
711 return ax
712
~/Documentos/Work/2021/ota-searches/dsc-notebooks/ota_searchs/ota/lib/python3.8/site-packages/seaborn/relational.py in plot(self, ax, kws)
469 # Loop over the semantic subsets and add to the plot
470 grouping_vars = "hue", "size", "style"
--> 471 for sub_vars, sub_data in self.iter_data(grouping_vars, from_comp_data=True):
472
473 if self.sort:
~/Documentos/Work/2021/ota-searches/dsc-notebooks/ota_searchs/ota/lib/python3.8/site-packages/seaborn/_core.py in iter_data(self, grouping_vars, reverse, from_comp_data)
981
982 if from_comp_data:
--> 983 data = self.comp_data
984 else:
985 data = self.plot_data
~/Documentos/Work/2021/ota-searches/dsc-notebooks/ota_searchs/ota/lib/python3.8/site-packages/seaborn/_core.py in comp_data(self)
1055 orig = self.plot_data[var].dropna()
1056 comp_col = pd.Series(index=orig.index, dtype=float, name=var)
-> 1057 comp_col.loc[orig.index] = pd.to_numeric(axis.convert_units(orig))
1058
1059 if axis.get_scale() == "log":
~/Documentos/Work/2021/ota-searches/dsc-notebooks/ota_searchs/ota/lib/python3.8/site-packages/pandas/core/indexing.py in __setitem__(self, key, value)
717 else:
718 key = com.apply_if_callable(key, self.obj)
--> 719 indexer = self._get_setitem_indexer(key)
720 self._has_valid_setitem_indexer(key)
721
~/Documentos/Work/2021/ota-searches/dsc-notebooks/ota_searchs/ota/lib/python3.8/site-packages/pandas/core/indexing.py in _get_setitem_indexer(self, key)
664
665 try:
--> 666 return self._convert_to_indexer(key, axis=0, is_setter=True)
667 except TypeError as e:
668
~/Documentos/Work/2021/ota-searches/dsc-notebooks/ota_searchs/ota/lib/python3.8/site-packages/pandas/core/indexing.py in _convert_to_indexer(self, key, axis, is_setter)
1255 return inds
1256 else:
-> 1257 return self._get_listlike_indexer(key, axis)[1]
1258 else:
1259 try:
~/Documentos/Work/2021/ota-searches/dsc-notebooks/ota_searchs/ota/lib/python3.8/site-packages/pandas/core/indexing.py in _get_listlike_indexer(self, key, axis)
1310 keyarr = ax.reindex(keyarr)[0]
1311 else:
-> 1312 keyarr, indexer, new_indexer = ax._reindex_non_unique(keyarr)
1313
1314 self._validate_read_indexer(keyarr, indexer, axis)
~/Documentos/Work/2021/ota-searches/dsc-notebooks/ota_searchs/ota/lib/python3.8/site-packages/pandas/core/indexes/base.py in _reindex_non_unique(self, target)
3865 return self[:0], np.array([], dtype=np.intp), None
3866
-> 3867 indexer, missing = self.get_indexer_non_unique(target)
3868 check = indexer != -1
3869 new_labels = self.take(indexer[check])
~/Documentos/Work/2021/ota-searches/dsc-notebooks/ota_searchs/ota/lib/python3.8/site-packages/pandas/core/indexes/base.py in get_indexer_non_unique(self, target)
5256 tgt_values = target._get_engine_target()
5257
-> 5258 indexer, missing = self._engine.get_indexer_non_unique(tgt_values)
5259 return ensure_platform_int(indexer), ensure_platform_int(missing)
5260
~/Documentos/Work/2021/ota-searches/dsc-notebooks/ota_searchs/ota/lib/python3.8/site-packages/pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_indexer_non_unique()
<__array_function__ internals> in resize(*args, **kwargs)
~/Documentos/Work/2021/ota-searches/dsc-notebooks/ota_searchs/ota/lib/python3.8/site-packages/numpy/core/fromnumeric.py in resize(a, new_shape)
1415 extra = Na - extra
1416
-> 1417 a = concatenate((a,) * n_copies)
1418 if extra > 0:
1419 a = a[:-extra]
<__array_function__ internals> in concatenate(*args, **kwargs)

Memory error while plotting dataframe (matplotlib)

I'm using Pandas with Jupyter Notebook and trying to plot a small dataframe:
and when i'm inserting the following line:
df9.plot(x='Time', y='Pressure mean')
I'm getting the following error:
---------------------------------------------------------------------------
MemoryError Traceback (most recent call last)
<ipython-input-8-c789b8162a1a> in <module>()
----> 1 df9.plot(x='Time', y='Pressure mean')
C:\Anaconda3\lib\site-packages\pandas\tools\plotting.py in __call__(self, x, y, kind, ax, subplots, sharex, sharey, layout, figsize, use_index, title, grid, legend, style, logx, logy, loglog, xticks, yticks, xlim, ylim, rot, fontsize, colormap, table, yerr, xerr, secondary_y, sort_columns, **kwds)
3735 fontsize=fontsize, colormap=colormap, table=table,
3736 yerr=yerr, xerr=xerr, secondary_y=secondary_y,
-> 3737 sort_columns=sort_columns, **kwds)
3738 __call__.__doc__ = plot_frame.__doc__
3739
C:\Anaconda3\lib\site-packages\pandas\tools\plotting.py in plot_frame(data, x, y, kind, ax, subplots, sharex, sharey, layout, figsize, use_index, title, grid, legend, style, logx, logy, loglog, xticks, yticks, xlim, ylim, rot, fontsize, colormap, table, yerr, xerr, secondary_y, sort_columns, **kwds)
2609 yerr=yerr, xerr=xerr,
2610 secondary_y=secondary_y, sort_columns=sort_columns,
-> 2611 **kwds)
2612
2613
C:\Anaconda3\lib\site-packages\pandas\tools\plotting.py in _plot(data, x, y, subplots, ax, kind, **kwds)
2436 plot_obj = klass(data, subplots=subplots, ax=ax, kind=kind, **kwds)
2437
-> 2438 plot_obj.generate()
2439 plot_obj.draw()
2440 return plot_obj.result
C:\Anaconda3\lib\site-packages\pandas\tools\plotting.py in generate(self)
1029
1030 for ax in self.axes:
-> 1031 self._post_plot_logic_common(ax, self.data)
1032 self._post_plot_logic(ax, self.data)
1033
C:\Anaconda3\lib\site-packages\pandas\tools\plotting.py in _post_plot_logic_common(self, ax, data)
1157 ax.set_xticklabels(xticklabels)
1158 self._apply_axis_properties(ax.xaxis, rot=self.rot,
-> 1159 fontsize=self.fontsize)
1160 self._apply_axis_properties(ax.yaxis, fontsize=self.fontsize)
1161 elif self.orientation == 'horizontal':
C:\Anaconda3\lib\site-packages\pandas\tools\plotting.py in _apply_axis_properties(self, axis, rot, fontsize)
1205
1206 def _apply_axis_properties(self, axis, rot=None, fontsize=None):
-> 1207 labels = axis.get_majorticklabels() + axis.get_minorticklabels()
1208 for label in labels:
1209 if rot is not None:
C:\Anaconda3\lib\site-packages\matplotlib\axis.py in get_majorticklabels(self)
1159 def get_majorticklabels(self):
1160 'Return a list of Text instances for the major ticklabels'
-> 1161 ticks = self.get_major_ticks()
1162 labels1 = [tick.label1 for tick in ticks if tick.label1On]
1163 labels2 = [tick.label2 for tick in ticks if tick.label2On]
C:\Anaconda3\lib\site-packages\matplotlib\axis.py in get_major_ticks(self, numticks)
1288 'get the tick instances; grow as necessary'
1289 if numticks is None:
-> 1290 numticks = len(self.get_major_locator()())
1291 if len(self.majorTicks) < numticks:
1292 # update the new tick label properties from the old
C:\Anaconda3\lib\site-packages\pandas\tseries\converter.py in __call__(self)
876 vmin, vmax = vmax, vmin
877 if self.isdynamic:
--> 878 locs = self._get_default_locs(vmin, vmax)
879 else: # pragma: no cover
880 base = self.base
C:\Anaconda3\lib\site-packages\pandas\tseries\converter.py in _get_default_locs(self, vmin, vmax)
857
858 if self.plot_obj.date_axis_info is None:
--> 859 self.plot_obj.date_axis_info = self.finder(vmin, vmax, self.freq)
860
861 locator = self.plot_obj.date_axis_info
C:\Anaconda3\lib\site-packages\pandas\tseries\converter.py in _daily_finder(vmin, vmax, freq)
481 Period(ordinal=int(vmax), freq=freq))
482 span = vmax.ordinal - vmin.ordinal + 1
--> 483 dates_ = PeriodIndex(start=vmin, end=vmax, freq=freq)
484 # Initialize the output
485 info = np.zeros(span,
C:\Anaconda3\lib\site-packages\pandas\tseries\period.py in __new__(cls, data, ordinal, freq, start, end, periods, copy, name, tz, **kwargs)
186 else:
187 data, freq = cls._generate_range(start, end, periods,
--> 188 freq, kwargs)
189 else:
190 ordinal, freq = cls._from_arraylike(data, freq, tz)
C:\Anaconda3\lib\site-packages\pandas\tseries\period.py in _generate_range(cls, start, end, periods, freq, fields)
200 raise ValueError('Can either instantiate from fields '
201 'or endpoints, but not both')
--> 202 subarr, freq = _get_ordinal_range(start, end, periods, freq)
203 elif field_count > 0:
204 subarr, freq = _range_from_fields(freq=freq, **fields)
C:\Anaconda3\lib\site-packages\pandas\tseries\period.py in _get_ordinal_range(start, end, periods, freq, mult)
1026 dtype=np.int64)
1027 else:
-> 1028 data = np.arange(start.ordinal, end.ordinal + 1, mult, dtype=np.int64)
1029
1030 return data, freq
MemoryError:
What is the problem ? I can't figure it out.
Thanks !
The issue originates from using TimedeltaIndex (or timedelta) for your time column. It was reported there: https://github.com/pydata/pandas/issues/8711
No solution has been brought to us yet.
As an alternative solution, I invite you to convert your data to DateTime or DateTimeIndex. Let's say YourDate contains the starting date of your observations.
df9.index = pd.DatetimeIndex(pd.datetime.strptime(YourDate,'%d.%m.%Y %H:%M:%S')
+df9['Time'])
df9.plot(y='Pressure mean')
Note that it will plot only the hours if you have less than 24 hours.
EDIT (2016-11-07):
I can now use timedelta as index and plot correctly. This is how I proceed (assuming I have float numbers indicating hours):
converter = {'Time[h]' : lambda x: pd.to_timedelta(float(x),unit='h')}#converts float to timedelta
df = pd.read_csv(fpath, sep='\t',
skiprows=len(comments),#header
names=dt.keys(),#you need of course your own dtype
dtype=dt,#you need of course your own dtype
encoding='latin-1',#European data...
skipinitialspace=True,
converters=converter)
df = df.set_index('Time[h]')#time column to index.
As Wli mentioned, it is a bug still to be fixed. But as a workaround this worked for me. -
plt.plot(s.index,s.values)

Seaborn FacetGrid user-defined plot function

In Seaborn, you can use FacetGrid to set up data-aware grids on which to plot. You can then use the map or map_dataframe methods to plot to those grids.
I am having trouble correctly specifying a user-defined plot function that works with map or map_dataframe. In this example I use the errorbar function in which I want to pass the error values as a 2xN array-like. In my example (taken from #mwaskom's answer here) the errors are symmetrical -- but imagine I have a situation where they are not.
In [255]:
from scipy import stats
tips_all = sns.load_dataset("tips")
tips_grouped = tips_all.groupby(["smoker", "size"])
tips = tips_grouped.mean()
tips["error_min"] = tips_grouped.total_bill.apply(stats.sem) * 1.96
tips["error_max"] = tips_grouped.total_bill.apply(stats.sem) * 1.96
tips.reset_index(inplace=True)
tips
Out[255]:
smoker size total_bill tip error_min error_max
0 No 1 8.660000 1.415000 2.763600 2.763600
1 No 2 15.342333 2.489000 0.919042 0.919042
2 No 3 21.009615 3.069231 2.680447 2.680447
3 No 4 27.769231 4.195769 3.303131 3.303131
4 No 5 30.576667 5.046667 11.620808 11.620808
5 No 6 34.830000 5.225000 9.194360 9.194360
6 Yes 1 5.825000 1.460000 5.399800 5.399800
7 Yes 2 17.955758 2.709545 1.805528 1.805528
8 Yes 3 28.191667 4.095000 6.898186 6.898186
9 Yes 4 30.609091 3.992727 5.150063 5.150063
10 Yes 5 29.305000 2.500000 2.263800 2.263800
Define my error bar function, that takes data and indexes the error columns to produce the 2xN array:
In [256]:
def my_errorbar(*args, **kwargs):
data = kwargs['data']
errors = np.vstack([data['error_min'],
data['error_max']])
print(errors)
plt.errorbar(data[args[0]],
data[args[1]],
yerr=errors,
**kwargs);
Call using map_dataframe (because my function gets the data as a kwarg):
In [257]:
g = sns.FacetGrid(tips, col="smoker", size=5)
g.map_dataframe(my_errorbar, "size", "total_bill", marker="o")
[[ 2.7636 0.9190424 2.68044722 3.30313068 11.62080751
9.19436049]
[ 2.7636 0.9190424 2.68044722 3.30313068 11.62080751
9.19436049]]
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-257-dc8b35ec70ec> in <module>()
1 g = sns.FacetGrid(tips, col="smoker", size=5)
----> 2 g.map_dataframe(my_errorbar, "size", "total_bill", marker="o")
/Users/x/miniconda3/envs/default/lib/python3.4/site-packages/seaborn/axisgrid.py in map_dataframe(self, func, *args, **kwargs)
509
510 # Draw the plot
--> 511 self._facet_plot(func, ax, args, kwargs)
512
513 # Finalize the annotations and layout
/Users/x/miniconda3/envs/default/lib/python3.4/site-packages/seaborn/axisgrid.py in _facet_plot(self, func, ax, plot_args, plot_kwargs)
527
528 # Draw the plot
--> 529 func(*plot_args, **plot_kwargs)
530
531 # Sort out the supporting information
<ipython-input-256-62202c841233> in my_errorbar(*args, **kwargs)
9 data[args[1]],
10 yerr=errors,
---> 11 **kwargs);
12
13
/Users/x/miniconda3/envs/default/lib/python3.4/site-packages/matplotlib/pyplot.py in errorbar(x, y, yerr, xerr, fmt, ecolor, elinewidth, capsize, barsabove, lolims, uplims, xlolims, xuplims, errorevery, capthick, hold, **kwargs)
2764 barsabove=barsabove, lolims=lolims, uplims=uplims,
2765 xlolims=xlolims, xuplims=xuplims,
-> 2766 errorevery=errorevery, capthick=capthick, **kwargs)
2767 draw_if_interactive()
2768 finally:
/Users/x/miniconda3/envs/default/lib/python3.4/site-packages/matplotlib/axes/_axes.py in errorbar(self, x, y, yerr, xerr, fmt, ecolor, elinewidth, capsize, barsabove, lolims, uplims, xlolims, xuplims, errorevery, capthick, **kwargs)
2859
2860 if not barsabove and plot_line:
-> 2861 l0, = self.plot(x, y, fmt, **kwargs)
2862
2863 if ecolor is None:
/Users/x/miniconda3/envs/default/lib/python3.4/site-packages/matplotlib/axes/_axes.py in plot(self, *args, **kwargs)
1371 lines = []
1372
-> 1373 for line in self._get_lines(*args, **kwargs):
1374 self.add_line(line)
1375 lines.append(line)
/Users/x/miniconda3/envs/default/lib/python3.4/site-packages/matplotlib/axes/_base.py in _grab_next_args(self, *args, **kwargs)
302 return
303 if len(remaining) <= 3:
--> 304 for seg in self._plot_args(remaining, kwargs):
305 yield seg
306 return
/Users/x/miniconda3/envs/default/lib/python3.4/site-packages/matplotlib/axes/_base.py in _plot_args(self, tup, kwargs)
290 ncx, ncy = x.shape[1], y.shape[1]
291 for j in xrange(max(ncx, ncy)):
--> 292 seg = func(x[:, j % ncx], y[:, j % ncy], kw, kwargs)
293 ret.append(seg)
294 return ret
/Users/x/miniconda3/envs/default/lib/python3.4/site-packages/matplotlib/axes/_base.py in _makeline(self, x, y, kw, kwargs)
242 **kw
243 )
--> 244 self.set_lineprops(seg, **kwargs)
245 return seg
246
/Users/x/miniconda3/envs/default/lib/python3.4/site-packages/matplotlib/axes/_base.py in set_lineprops(self, line, **kwargs)
184 raise TypeError('There is no line property "%s"' % key)
185 func = getattr(line, funcName)
--> 186 func(val)
187
188 def set_patchprops(self, fill_poly, **kwargs):
/Users/x/miniconda3/envs/default/lib/python3.4/site-packages/matplotlib/lines.py in set_data(self, *args)
557 """
558 if len(args) == 1:
--> 559 x, y = args[0]
560 else:
561 x, y = args
ValueError: too many values to unpack (expected 2)
I don't understand the reason for the failure here. Note that the plot function gets something, because a plot of the first grid of errorbars is produced. I assume I'm not passing the **kwargs dictionary on correctly.
In general, I would find it really helpful if the tutorial for Seaborn contained one or two examples of user-defined plot functions passed to map or map_dataframe.
This is #mwaskom's answer, and works a treat (see comments):
Just change the my_errorbar function so that it pops the data out of the keyword dict:
def my_errorbar(*args, **kwargs):
data = kwargs.pop('data')
errors = np.vstack([data['error_min'],
data['error_max']])
print(errors)
plt.errorbar(data[args[0]],
data[args[1]],
yerr=errors,
**kwargs);

Plotting dataframe raises error of ordinal value must be >= 1

I follow the tutorial at http://nbviewer.ipython.org/github/jvns/pandas-cookbook/blob/v0.1/cookbook/Chapter%205%20-%20Combining%20dataframes%20and%20scraping%20Canadian%20weather%20data.ipynb
I have a pandas dataframe
weather_mar2012['Temp (°C)']
Out[30]:
Date/Time
2012-03-01 00:00:00 -5.5
2012-03-01 01:00:00 -5.7
2012-03-01 02:00:00 -5.4
When trying to plot it i get an error
weather_mar2012['Temp (°C)'].plot(figsize=(15, 5))
---------------------------------------------------------------------------
ValueError Traceback (most recent call last) <ipython-input-31-21c79ba7d5ef> in <module>()
----> 1 weather_mar2012['Temp (°C)'].plot(figsize=(15, 5))
/home/vagrant/anaconda3/lib/python3.4/site-packages/pandas/tools/plotting.py in plot_series(data, kind, ax, figsize, use_index, title, grid, legend, style, logx, logy, loglog, xticks, yticks, xlim, ylim, rot, fontsize, colormap, table, yerr, xerr, label, secondary_y, **kwds) 2486 yerr=yerr, xerr=xerr, 2487 label=label, secondary_y=secondary_y,
-> 2488 **kwds) 2489 2490
/home/vagrant/anaconda3/lib/python3.4/site-packages/pandas/tools/plotting.py in _plot(data, x, y, subplots, ax, kind, **kwds) 2292 plot_obj = klass(data, subplots=subplots, ax=ax, kind=kind, **kwds) 2293
-> 2294 plot_obj.generate() 2295 plot_obj.draw() 2296 return plot_obj.result
/home/vagrant/anaconda3/lib/python3.4/site-packages/pandas/tools/plotting.py in generate(self)
922 self._make_legend()
923 self._post_plot_logic()
--> 924 self._adorn_subplots()
925
926 def _args_adjust(self):
/home/vagrant/anaconda3/lib/python3.4/site-packages/pandas/tools/plotting.py in _adorn_subplots(self) 1052 ax.set_xticklabels(xticklabels) 1053 self._apply_axis_properties(ax.xaxis, rot=self.rot,
-> 1054 fontsize=self.fontsize) 1055 elif self.orientation == 'horizontal': 1056 if self._need_to_set_index:
/home/vagrant/anaconda3/lib/python3.4/site-packages/pandas/tools/plotting.py in _apply_axis_properties(self, axis, rot, fontsize) 1061 1062 def _apply_axis_properties(self, axis, rot=None, fontsize=None):
-> 1063 labels = axis.get_majorticklabels() + axis.get_minorticklabels() 1064 for label in labels: 1065 if rot is not None:
/home/vagrant/anaconda3/lib/python3.4/site-packages/matplotlib/axis.py in get_majorticklabels(self) 1155 def get_majorticklabels(self): 1156 'Return a list of Text instances for the major ticklabels'
-> 1157 ticks = self.get_major_ticks() 1158 labels1 = [tick.label1 for tick in ticks if tick.label1On] 1159 labels2 = [tick.label2 for tick in ticks if tick.label2On]
/home/vagrant/anaconda3/lib/python3.4/site-packages/matplotlib/axis.py in get_major_ticks(self, numticks) 1284 'get the tick instances; grow as necessary' 1285 if numticks is None:
-> 1286 numticks = len(self.get_major_locator()()) 1287 if len(self.majorTicks) < numticks: 1288 # update the new tick label properties from the old
/home/vagrant/anaconda3/lib/python3.4/site-packages/matplotlib/dates.py in __call__(self)
863 def __call__(self):
864 'Return the locations of the ticks'
--> 865 self.refresh()
866 return self._locator()
867
/home/vagrant/anaconda3/lib/python3.4/site-packages/matplotlib/dates.py in refresh(self)
880 def refresh(self):
881 'Refresh internal information based on current limits.'
--> 882 dmin, dmax = self.viewlim_to_dt()
883 self._locator = self.get_locator(dmin, dmax)
884
/home/vagrant/anaconda3/lib/python3.4/site-packages/matplotlib/dates.py in viewlim_to_dt(self)
624 def viewlim_to_dt(self):
625 vmin, vmax = self.axis.get_view_interval()
--> 626 return num2date(vmin, self.tz), num2date(vmax, self.tz)
627
628 def _get_unit(self):
/home/vagrant/anaconda3/lib/python3.4/site-packages/matplotlib/dates.py in num2date(x, tz)
343 tz = _get_rc_timezone()
344 if not cbook.iterable(x):
--> 345 return _from_ordinalf(x, tz)
346 else:
347 x = np.asarray(x)
/home/vagrant/anaconda3/lib/python3.4/site-packages/matplotlib/dates.py in _from_ordinalf(x, tz)
223 tz = _get_rc_timezone()
224 ix = int(x)
--> 225 dt = datetime.datetime.fromordinal(ix)
226 remainder = float(x) - ix
227 hour, remainder = divmod(24 * remainder, 1)
ValueError: ordinal must be >= 1
What does it mean?
How can i fix this?
I was getting this error in ipython even with current pandas 0.20.3
Traced it down to having run a script beforehand which saved a figure with a different index, but hadn't done plt.show() as the figure had been saved and I didn't need to see it.
So as #naught101 hinted, forcing plt.close('all') before showing the next figure fixes the issue. Probably good practice at the end of scripts anyway.
This was a bug in pandas: 0.18.1 and fixed in 0.19.2, eg run conda upgrade pandas

Bar chart in pandas on time series data

I am trying to do bar chart in pandas on time series data.
Documentation says it is not possible: http://pandas.pydata.org/pandas-docs/stable/visualization.html#bar-plots
Is there some workaround ?
This is my code
# there must be ORDER BY, other wise rows will not be ordered
df = sql.read_frame("SELECT * FROM hzmo_report ORDER BY datum;", cnx, index_col='datum')
df.index = pd.to_datetime(df.index) # converting to DatetimeIndex
df['korisnika'].plot(ax=axs1[0], title='SOMETHING', marker='o')
df['korisnika'].diff().plot(ax=axs1[1], title='SOMETHING', marker='o') # i would like this to be bar plot
If I do
df['korisnika'].diff().plot(kind='bar', ax=axs1[1], title='SOMETHING', marker='o')
I have just added kind='bar'
I get:
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-109-d41eb2b2e3a7> in <module>()
36 fig1.suptitle('Umirovljenici', fontsize=16)
37 df['korisnika'].plot(ax=axs1[0], title='Broj korisnika mirovine', marker='o')
---> 38 ( df['korisnika'].diff() ).plot(ax=axs1[1], kind='bar', title='Apsolutna razlika naspram prethodnog mjeseca', marker='o')
39 #df['korisnika'].diff().hist()
40
C:\Documents and Settings\hr1ub098\Application Data\Python\Python27\site-packages\pandas\tools\plotting.pyc in plot_series(series, label, kind, use_index, rot, xticks, yticks, xlim, ylim, ax, style, grid, legend, logy, secondary_y, **kwds)
1504 secondary_y=secondary_y, **kwds)
1505
-> 1506 plot_obj.generate()
1507 plot_obj.draw()
1508
C:\Documents and Settings\hr1ub098\Application Data\Python\Python27\site-packages\pandas\tools\plotting.pyc in generate(self)
731 self._compute_plot_data()
732 self._setup_subplots()
--> 733 self._make_plot()
734 self._post_plot_logic()
735 self._adorn_subplots()
C:\Documents and Settings\hr1ub098\Application Data\Python\Python27\site-packages\pandas\tools\plotting.pyc in _make_plot(self)
1291 else:
1292 rect = bar_f(ax, self.ax_pos + i * 0.75 / K, y, 0.75 / K,
-> 1293 start=pos_prior, label=label, **kwds)
1294 rects.append(rect)
1295 labels.append(label)
C:\Documents and Settings\hr1ub098\Application Data\Python\Python27\site-packages\pandas\tools\plotting.pyc in f(ax, x, y, w, start, **kwds)
1251 if self.kind == 'bar':
1252 def f(ax, x, y, w, start=None, **kwds):
-> 1253 return ax.bar(x, y, w, bottom=start, **kwds)
1254 elif self.kind == 'barh':
1255 def f(ax, x, y, w, start=None, **kwds):
C:\Documents and Settings\hr1ub098\Application Data\Python\Python27\site-packages\matplotlib\axes.pyc in bar(self, left, height, width, bottom, **kwargs)
4779 label='_nolegend_'
4780 )
-> 4781 r.update(kwargs)
4782 r.get_path()._interpolation_steps = 100
4783 #print r.get_label(), label, 'label' in kwargs
C:\Documents and Settings\hr1ub098\Application Data\Python\Python27\site-packages\matplotlib\artist.pyc in update(self, props)
657 func = getattr(self, 'set_'+k, None)
658 if func is None or not callable(func):
--> 659 raise AttributeError('Unknown property %s'%k)
660 func(v)
661 changed = True
AttributeError: Unknown property marker
You can plot a bar-plot of a time-series. Not that useful IMHO though.
ts = Series(randn(20),date_range('20130101',periods=20))
ts.plot()
A time-series line-plot
A Bar Plot

Categories