Seaborn FacetGrid user-defined plot function - python

In Seaborn, you can use FacetGrid to set up data-aware grids on which to plot. You can then use the map or map_dataframe methods to plot to those grids.
I am having trouble correctly specifying a user-defined plot function that works with map or map_dataframe. In this example I use the errorbar function in which I want to pass the error values as a 2xN array-like. In my example (taken from #mwaskom's answer here) the errors are symmetrical -- but imagine I have a situation where they are not.
In [255]:
from scipy import stats
tips_all = sns.load_dataset("tips")
tips_grouped = tips_all.groupby(["smoker", "size"])
tips = tips_grouped.mean()
tips["error_min"] = tips_grouped.total_bill.apply(stats.sem) * 1.96
tips["error_max"] = tips_grouped.total_bill.apply(stats.sem) * 1.96
tips.reset_index(inplace=True)
tips
Out[255]:
smoker size total_bill tip error_min error_max
0 No 1 8.660000 1.415000 2.763600 2.763600
1 No 2 15.342333 2.489000 0.919042 0.919042
2 No 3 21.009615 3.069231 2.680447 2.680447
3 No 4 27.769231 4.195769 3.303131 3.303131
4 No 5 30.576667 5.046667 11.620808 11.620808
5 No 6 34.830000 5.225000 9.194360 9.194360
6 Yes 1 5.825000 1.460000 5.399800 5.399800
7 Yes 2 17.955758 2.709545 1.805528 1.805528
8 Yes 3 28.191667 4.095000 6.898186 6.898186
9 Yes 4 30.609091 3.992727 5.150063 5.150063
10 Yes 5 29.305000 2.500000 2.263800 2.263800
Define my error bar function, that takes data and indexes the error columns to produce the 2xN array:
In [256]:
def my_errorbar(*args, **kwargs):
data = kwargs['data']
errors = np.vstack([data['error_min'],
data['error_max']])
print(errors)
plt.errorbar(data[args[0]],
data[args[1]],
yerr=errors,
**kwargs);
Call using map_dataframe (because my function gets the data as a kwarg):
In [257]:
g = sns.FacetGrid(tips, col="smoker", size=5)
g.map_dataframe(my_errorbar, "size", "total_bill", marker="o")
[[ 2.7636 0.9190424 2.68044722 3.30313068 11.62080751
9.19436049]
[ 2.7636 0.9190424 2.68044722 3.30313068 11.62080751
9.19436049]]
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-257-dc8b35ec70ec> in <module>()
1 g = sns.FacetGrid(tips, col="smoker", size=5)
----> 2 g.map_dataframe(my_errorbar, "size", "total_bill", marker="o")
/Users/x/miniconda3/envs/default/lib/python3.4/site-packages/seaborn/axisgrid.py in map_dataframe(self, func, *args, **kwargs)
509
510 # Draw the plot
--> 511 self._facet_plot(func, ax, args, kwargs)
512
513 # Finalize the annotations and layout
/Users/x/miniconda3/envs/default/lib/python3.4/site-packages/seaborn/axisgrid.py in _facet_plot(self, func, ax, plot_args, plot_kwargs)
527
528 # Draw the plot
--> 529 func(*plot_args, **plot_kwargs)
530
531 # Sort out the supporting information
<ipython-input-256-62202c841233> in my_errorbar(*args, **kwargs)
9 data[args[1]],
10 yerr=errors,
---> 11 **kwargs);
12
13
/Users/x/miniconda3/envs/default/lib/python3.4/site-packages/matplotlib/pyplot.py in errorbar(x, y, yerr, xerr, fmt, ecolor, elinewidth, capsize, barsabove, lolims, uplims, xlolims, xuplims, errorevery, capthick, hold, **kwargs)
2764 barsabove=barsabove, lolims=lolims, uplims=uplims,
2765 xlolims=xlolims, xuplims=xuplims,
-> 2766 errorevery=errorevery, capthick=capthick, **kwargs)
2767 draw_if_interactive()
2768 finally:
/Users/x/miniconda3/envs/default/lib/python3.4/site-packages/matplotlib/axes/_axes.py in errorbar(self, x, y, yerr, xerr, fmt, ecolor, elinewidth, capsize, barsabove, lolims, uplims, xlolims, xuplims, errorevery, capthick, **kwargs)
2859
2860 if not barsabove and plot_line:
-> 2861 l0, = self.plot(x, y, fmt, **kwargs)
2862
2863 if ecolor is None:
/Users/x/miniconda3/envs/default/lib/python3.4/site-packages/matplotlib/axes/_axes.py in plot(self, *args, **kwargs)
1371 lines = []
1372
-> 1373 for line in self._get_lines(*args, **kwargs):
1374 self.add_line(line)
1375 lines.append(line)
/Users/x/miniconda3/envs/default/lib/python3.4/site-packages/matplotlib/axes/_base.py in _grab_next_args(self, *args, **kwargs)
302 return
303 if len(remaining) <= 3:
--> 304 for seg in self._plot_args(remaining, kwargs):
305 yield seg
306 return
/Users/x/miniconda3/envs/default/lib/python3.4/site-packages/matplotlib/axes/_base.py in _plot_args(self, tup, kwargs)
290 ncx, ncy = x.shape[1], y.shape[1]
291 for j in xrange(max(ncx, ncy)):
--> 292 seg = func(x[:, j % ncx], y[:, j % ncy], kw, kwargs)
293 ret.append(seg)
294 return ret
/Users/x/miniconda3/envs/default/lib/python3.4/site-packages/matplotlib/axes/_base.py in _makeline(self, x, y, kw, kwargs)
242 **kw
243 )
--> 244 self.set_lineprops(seg, **kwargs)
245 return seg
246
/Users/x/miniconda3/envs/default/lib/python3.4/site-packages/matplotlib/axes/_base.py in set_lineprops(self, line, **kwargs)
184 raise TypeError('There is no line property "%s"' % key)
185 func = getattr(line, funcName)
--> 186 func(val)
187
188 def set_patchprops(self, fill_poly, **kwargs):
/Users/x/miniconda3/envs/default/lib/python3.4/site-packages/matplotlib/lines.py in set_data(self, *args)
557 """
558 if len(args) == 1:
--> 559 x, y = args[0]
560 else:
561 x, y = args
ValueError: too many values to unpack (expected 2)
I don't understand the reason for the failure here. Note that the plot function gets something, because a plot of the first grid of errorbars is produced. I assume I'm not passing the **kwargs dictionary on correctly.
In general, I would find it really helpful if the tutorial for Seaborn contained one or two examples of user-defined plot functions passed to map or map_dataframe.

This is #mwaskom's answer, and works a treat (see comments):
Just change the my_errorbar function so that it pops the data out of the keyword dict:
def my_errorbar(*args, **kwargs):
data = kwargs.pop('data')
errors = np.vstack([data['error_min'],
data['error_max']])
print(errors)
plt.errorbar(data[args[0]],
data[args[1]],
yerr=errors,
**kwargs);

Related

How could I solve these errors below?

I'm new at python and I'm trying to run this piece of code that found in this link below:
http://benalexkeen.com/gradient-boosting-in-python-using-scikit-learn/
When I run the first two snippets I got a bunch of errors, could anyone please correct it for me, please?. I have data and I try to draw them like this in these two snippets.
These are the two piece of code
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn import ensemble
from sklearn import linear_model
# Mock data
x = np.arange(0, 60)
y = map(lambda x: x / 2 + (x // 10) % 2 * 20 * x / 5 + np.random.random() * 10, x)
x = pd.DataFrame({'x': x})
# Plot mock data
plt.figure(figsize=(10, 5))
plt.scatter(x, y)
plt.show()
I got the errors that below:
RuntimeError Traceback (most recent call last)
<ipython-input-2-7f1d946a4092> in <module>
6 # Plot mock data
7 plt.figure(figsize=(10, 5))
----> 8 plt.scatter(x, y)
9 plt.show()
~\Anaconda3\lib\site-packages\matplotlib\pyplot.py in scatter(x, y, s, c, marker, cmap, norm, vmin, vmax, alpha, linewidths, verts, edgecolors, data, **kwargs)
2862 vmin=vmin, vmax=vmax, alpha=alpha, linewidths=linewidths,
2863 verts=verts, edgecolors=edgecolors, **({"data": data} if data
-> 2864 is not None else {}), **kwargs)
2865 sci(__ret)
2866 return __ret
~\Anaconda3\lib\site-packages\matplotlib\__init__.py in inner(ax, data, *args, **kwargs)
1808 "the Matplotlib list!)" % (label_namer, func.__name__),
1809 RuntimeWarning, stacklevel=2)
-> 1810 return func(ax, *args, **kwargs)
1811
1812 inner.__doc__ = _add_data_doc(inner.__doc__,
~\Anaconda3\lib\site-packages\matplotlib\axes\_axes.py in scatter(self, x, y, s, c, marker, cmap, norm, vmin, vmax, alpha, linewidths, verts, edgecolors, **kwargs)
4170 edgecolors = 'face'
4171
-> 4172 self._process_unit_info(xdata=x, ydata=y, kwargs=kwargs)
4173 x = self.convert_xunits(x)
4174 y = self.convert_yunits(y)
~\Anaconda3\lib\site-packages\matplotlib\axes\_base.py in _process_unit_info(self, xdata, ydata, kwargs)
2134
2135 kwargs = _process_single_axis(xdata, self.xaxis, 'xunits', kwargs)
-> 2136 kwargs = _process_single_axis(ydata, self.yaxis, 'yunits', kwargs)
2137 return kwargs
2138
~\Anaconda3\lib\site-packages\matplotlib\axes\_base.py in _process_single_axis(data, axis, unit_name, kwargs)
2116 # We only need to update if there is nothing set yet.
2117 if not axis.have_units():
-> 2118 axis.update_units(data)
2119
2120 # Check for units in the kwargs, and if present update axis
~\Anaconda3\lib\site-packages\matplotlib\axis.py in update_units(self, data)
1465 """
1466
-> 1467 converter = munits.registry.get_converter(data)
1468 if converter is None:
1469 return False
~\Anaconda3\lib\site-packages\matplotlib\units.py in get_converter(self, x)
185 if converter is None:
186 try:
--> 187 thisx = safe_first_element(x)
188 except (TypeError, StopIteration):
189 pass
~\Anaconda3\lib\site-packages\matplotlib\cbook\__init__.py in safe_first_element(obj)
1633 except TypeError:
1634 pass
-> 1635 raise RuntimeError("matplotlib does not support generators "
1636 "as input")
1637 return next(iter(obj))
RuntimeError: matplotlib does not support generators as input
The results that I'm expecting to get below in this picture
Replace plt.scatter(x, y) with plt.scatter(x, list(y)).
The value of y represents a generator function, but matplotlib needs a list here. That worked for me on python 3.6
Convert map object to list, because in python 3 is returned iterator:
y = list(map(lambda x: x / 2 + (x // 10) % 2 * 20 * x / 5 + np.random.random() * 10, x))

Adding legend to matplotlib scatterplot

I am following the Randy Olson approach to make beautiful time trends graphs (see here).
When I plot the following code:
tableau20 = [(31, 119, 180), (174, 199, 232)]
for i in range(len(tableau20)):
r, g, b = tableau20[i]
tableau20[i] = (r / 255., g / 255., b / 255.)
plt.figure(figsize=(12, 14))
# Remove the plot frame lines. They are unnecessary chartjunk.
ax = plt.subplot(111)
ax.spines["top"].set_visible(False)
ax.spines["bottom"].set_visible(False)
ax.spines["right"].set_visible(False)
ax.spines["left"].set_visible(False)
# Ensure that the axis ticks only show up on the bottom and left of the plot.
# Ticks on the right and top of the plot are generally unnecessary chartjunk.
ax.get_xaxis().tick_bottom()
ax.get_yaxis().tick_left()
majors = ['Number of findings (total)', 'Business Ethics Findings']
for rank, column in enumerate(majors):
plt.plot(monthly_df.argrc__End_Date__c.values, monthly_df[column.replace("\n", " ")].values,
lw=2.5, color=tableau20[rank])
y_pos = monthly_df[column.replace("\n", " ")].values[-1] - 0.5
if column == "Number of findings (total)":
y_pos += 0.5
elif column == 'Business Ethics Findings':
y_pos -= 0.5
plt.text(2018.1, y_pos, column, fontsize=12, color=tableau20[rank])
I get this error:
If I eliminate
plt.text(2016, y_pos, column, fontsize=12, color=tableau20[rank])
Instead I get the two lines plotted on the graph correctly, but with no legend. How do I show the names of my columns?
EDIT
I am adding here the traceback for further information. I hope this is helpful.
C:\Users\filippo.sebastio\Anaconda3\lib\site-packages\matplotlib\cbook\deprecation.py:107: MatplotlibDeprecationWarning: Adding an axes using the same arguments as a previous axes currently reuses the earlier instance. In a future version, a new instance will always be created and returned. Meanwhile, this warning can be suppressed, and the future behavior ensured, by passing a unique label to each axes instance.
warnings.warn(message, mplDeprecation, stacklevel=1)
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
~\Anaconda3\lib\site-packages\IPython\core\formatters.py in __call__(self, obj)
339 pass
340 else:
--> 341 return printer(obj)
342 # Finally look for special method names
343 method = get_real_method(obj, self.print_method)
~\Anaconda3\lib\site-packages\IPython\core\pylabtools.py in <lambda>(fig)
239
240 if 'png' in formats:
--> 241 png_formatter.for_type(Figure, lambda fig: print_figure(fig, 'png', **kwargs))
242 if 'retina' in formats or 'png2x' in formats:
243 png_formatter.for_type(Figure, lambda fig: retina_figure(fig, **kwargs))
~\Anaconda3\lib\site-packages\IPython\core\pylabtools.py in print_figure(fig, fmt, bbox_inches, **kwargs)
123
124 bytes_io = BytesIO()
--> 125 fig.canvas.print_figure(bytes_io, **kw)
126 data = bytes_io.getvalue()
127 if fmt == 'svg':
~\Anaconda3\lib\site-packages\matplotlib\backend_bases.py in print_figure(self, filename, dpi, facecolor, edgecolor, orientation, format, **kwargs)
2261 orientation=orientation,
2262 bbox_inches_restore=_bbox_inches_restore,
-> 2263 **kwargs)
2264 finally:
2265 if bbox_inches and restore_bbox:
~\Anaconda3\lib\site-packages\matplotlib\backends\backend_agg.py in print_png(self, filename_or_obj, *args, **kwargs)
515
516 def print_png(self, filename_or_obj, *args, **kwargs):
--> 517 FigureCanvasAgg.draw(self)
518 renderer = self.get_renderer()
519 original_dpi = renderer.dpi
~\Anaconda3\lib\site-packages\matplotlib\backends\backend_agg.py in draw(self)
427 Draw the figure using the renderer
428 """
--> 429 self.renderer = self.get_renderer(cleared=True)
430 # acquire a lock on the shared font cache
431 RendererAgg.lock.acquire()
~\Anaconda3\lib\site-packages\matplotlib\backends\backend_agg.py in get_renderer(self, cleared)
452
453 if need_new_renderer:
--> 454 self.renderer = RendererAgg(w, h, self.figure.dpi)
455 self._lastKey = key
456 elif cleared:
~\Anaconda3\lib\site-packages\matplotlib\backends\backend_agg.py in __init__(self, width, height, dpi)
99 self.width = width
100 self.height = height
--> 101 self._renderer = _RendererAgg(int(width), int(height), dpi)
102 self._filter_renderers = []
103
ValueError: Image size of 312943x821 pixels is too large. It must be less than 2^16 in each direction.
<Figure size 864x1008 with 1 Axes>

Jupyter Seaborn sns.lmplot Error

I am having trouble while trying to plot a lmplot in Jupyter.
Below you can see the first rows of my data and rest of the data goes same as well.
Year_of_Release Platform_General Platform counts Global_Sales(M#) GS_Amount/Game
1994.0 Sony_Playstation PS 1 1.27 1.270000
1996.0 Sony_Playstation PS 5 17.48 3.496000
1997.0 Sony_Playstation PS 12 30.89 2.574167
1998.0 Sony_Playstation PS 22 43.08 1.958182
1999.0 Sony_Playstation PS 25 49.02 1.960800
Below is the code that should draw the plot but
# Make a custom palette with platform colors
pal = dict(Sony_Playstation="#6495ED",Microsoft_Xbox="#F08080",Nintendo="Green")
# Show the survival proability as a function of platforms
g = sns.lmplot(x="Year_of_Release", y="GS_Amount/Game", col="Platform_General", hue="Platform_General",
data=vgs_df_pf_grouped,palette=pal, y_jitter=.02, logistic=True)
# Use more informative axis labels than are provided by default
g.set_axis_labels("Year of Release", "Sales per Game Released (M #)")
When I run this code below long error massage appears and because of I am new to data analytics I can't understand what is wrong with data or code.
I appreciate some help with this one. Thank you.
ValueError Traceback (most recent call last)
<ipython-input-68-c78b6ba34d96> in <module>()
5 # Show the survival proability as a function of age and sex
6 g = sns.lmplot(x="Year_of_Release", y="GS_Amount/Game",col="Platform_General", hue="Platform_General",
----> 7 data=vgs_df_pf_grouped,palette=pal, y_jitter=.02, logistic=True)
8
9 # Use more informative axis labels than are provided by default
~/anaconda3/lib/python3.6/site-packages/seaborn/regression.py in lmplot(x, y, data, hue, col, row, palette, col_wrap, size, aspect, markers, sharex, sharey, hue_order, col_order, row_order, legend, legend_out, x_estimator, x_bins, x_ci, scatter, fit_reg, ci, n_boot, units, order, logistic, lowess, robust, logx, x_partial, y_partial, truncate, x_jitter, y_jitter, scatter_kws, line_kws)
588 scatter_kws=scatter_kws, line_kws=line_kws,
589 )
--> 590 facets.map_dataframe(regplot, x, y, **regplot_kws)
591
592 # Add a legend
~/anaconda3/lib/python3.6/site-packages/seaborn/axisgrid.py in map_dataframe(self, func, *args, **kwargs)
795
796 # Draw the plot
--> 797 self._facet_plot(func, ax, args, kwargs)
798
799 # Finalize the annotations and layout
~/anaconda3/lib/python3.6/site-packages/seaborn/axisgrid.py in _facet_plot(self, func, ax, plot_args, plot_kwargs)
813
814 # Draw the plot
--> 815 func(*plot_args, **plot_kwargs)
816
817 # Sort out the supporting information
~/anaconda3/lib/python3.6/site-packages/seaborn/regression.py in regplot(x, y, data, x_estimator, x_bins, x_ci, scatter, fit_reg, ci, n_boot, units, order, logistic, lowess, robust, logx, x_partial, y_partial, truncate, dropna, x_jitter, y_jitter, label, color, marker, scatter_kws, line_kws, ax)
788 scatter_kws["marker"] = marker
789 line_kws = {} if line_kws is None else copy.copy(line_kws)
--> 790 plotter.plot(ax, scatter_kws, line_kws)
791 return ax
792
~/anaconda3/lib/python3.6/site-packages/seaborn/regression.py in plot(self, ax, scatter_kws, line_kws)
340 self.scatterplot(ax, scatter_kws)
341 if self.fit_reg:
--> 342 self.lineplot(ax, line_kws)
343
344 # Label the axes
~/anaconda3/lib/python3.6/site-packages/seaborn/regression.py in lineplot(self, ax, kws)
385
386 # Fit the regression model
--> 387 grid, yhat, err_bands = self.fit_regression(ax)
388
389 # Get set default aesthetics
~/anaconda3/lib/python3.6/site-packages/seaborn/regression.py in fit_regression(self, ax, x_range, grid)
198 from statsmodels.genmod.families import Binomial
199 yhat, yhat_boots = self.fit_statsmodels(grid, GLM,
--> 200 family=Binomial())
201 elif self.lowess:
202 ci = None
~/anaconda3/lib/python3.6/site-packages/seaborn/regression.py in fit_statsmodels(self, grid, model, **kwargs)
258 return yhat
259
--> 260 yhat = reg_func(X, y)
261 if self.ci is None:
262 return yhat, None
~/anaconda3/lib/python3.6/site-packages/seaborn/regression.py in reg_func(_x, _y)
252 def reg_func(_x, _y):
253 try:
--> 254 yhat = model(_y, _x, **kwargs).fit().predict(grid)
255 except glm.PerfectSeparationError:
256 yhat = np.empty(len(grid))
~/anaconda3/lib/python3.6/site-packages/statsmodels/genmod/generalized_linear_model.py in fit(self, start_params, maxiter, method, tol, scale, cov_type, cov_kwds, use_t, full_output, disp, max_start_irls, **kwargs)
901 return self._fit_irls(start_params=start_params, maxiter=maxiter,
902 tol=tol, scale=scale, cov_type=cov_type,
--> 903 cov_kwds=cov_kwds, use_t=use_t, **kwargs)
904 else:
905 return self._fit_gradient(start_params=start_params,
~/anaconda3/lib/python3.6/site-packages/statsmodels/genmod/generalized_linear_model.py in _fit_irls(self, start_params, maxiter, tol, scale, cov_type, cov_kwds, use_t, **kwargs)
977 dev = self.family.deviance(self.endog, mu, self.freq_weights)
978 if np.isnan(dev):
--> 979 raise ValueError("The first guess on the deviance function "
980 "returned a nan. This could be a boundary "
981 " problem and should be reported.")
ValueError: The first guess on the deviance function returned a nan. This could be a boundary problem and should be reported.

matplotlib - Error passing line argument through **kwargs

I have a function who plot a line, something like that:
def tmp_plot(*args, **kwargs):
plt.plot([1,2,3,4,5],[1,2,3,4,5], *args, **kwargs)
and when I'm calling it with by passing line as a keyword argument like that:
tmp_plot(line = '-')
I get this error:
TypeError: set_lineprops() got multiple values for keyword argument 'line'
but it work fine with color argument.
I'm using matplotlib 1.4.3 and python 2.7.7
Any clues?
You can see where Matplotlib adds its own line argument in the Traceback below. This means your own keyword argument is a duplicate of Matplotlib's own one in the set_lineprops call:
In [1]: import matplotlib.pyplot as plt
In [2]: plt.plot([1,2,3], [1,4,9], line='-')
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-82-f298702afcfe> in <module>()
----> 1 plt.plot([1,2,3], [1,4,9], line='-')
/Users/xnx/anaconda/envs/py33/lib/python3.3/site-packages/matplotlib/pyplot.py in plot(*args, **kwargs)
2985 ax.hold(hold)
2986 try:
-> 2987 ret = ax.plot(*args, **kwargs)
2988 draw_if_interactive()
2989 finally:
/Users/xnx/anaconda/envs/py33/lib/python3.3/site-packages/matplotlib/axes.py in plot(self, *args, **kwargs)
4137 lines = []
4138
-> 4139 for line in self._get_lines(*args, **kwargs):
4140 self.add_line(line)
4141 lines.append(line)
/Users/xnx/anaconda/envs/py33/lib/python3.3/site-packages/matplotlib/axes.py in _grab_next_args(self, *args, **kwargs)
317 return
318 if len(remaining) <= 3:
--> 319 for seg in self._plot_args(remaining, kwargs):
320 yield seg
321 return
/Users/xnx/anaconda/envs/py33/lib/python3.3/site-packages/matplotlib/axes.py in _plot_args(self, tup, kwargs)
305 ncx, ncy = x.shape[1], y.shape[1]
306 for j in range(max(ncx, ncy)):
--> 307 seg = func(x[:, j % ncx], y[:, j % ncy], kw, kwargs)
308 ret.append(seg)
309 return ret
/Users/xnx/anaconda/envs/py33/lib/python3.3/site-packages/matplotlib/axes.py in _makeline(self, x, y, kw, kwargs)
257 **kw
258 )
--> 259 self.set_lineprops(seg, **kwargs)
260 return seg
261
TypeError: set_lineprops() got multiple values for argument 'line'
Perhaps you mean ls or linestyle instead of line in any case?
In [83]: plt.plot([1,2,3], [1,4,9], ls='-')
Out[83]: [<matplotlib.lines.Line2D at 0x10ed65610>]
I would guess the internals of matplotlib are unpacking an internal dictionary of parameters in addition to the caller provided ones, without stripping out duplicates so both you and matplot lib internals are providing separate keyword parameters of the same name via two parallel routes.

Why KeyError while plottin a pandas data frame with matplotlib? [duplicate]

This question already has answers here:
KeyError when plotting a sliced pandas dataframe with datetimes
(3 answers)
Closed 7 years ago.
I have this data frame:
date_obj col1 col2 col3 col4
40038 2012-11-19 1.000 0.831856 0.986209 0.843919
40039 2012-11-20 2.015 0.521764 1.177320 0.938245
40040 2012-11-21 1.160 1.645345 1.964620 4.536440
40041 2012-11-22 3.171 2.444018 2.931550 3.737840
40042 2012-11-23 4.563 3.208111 3.587250 2.434040
40043 2012-11-24 5.379 3.863732 3.824540 1.634780
40044 2012-11-26 1.125 20.756739 4.162820 23.552100
40045 2012-11-27 3.340 5.369354 4.535090 1.129290
40046 2012-11-28 5.463 12.185730 8.102790 1.224300
40047 2012-11-29 6.596 14.328685 9.271000 24.655600
40048 2012-11-30 31.544 13.513497 12.103400 21.273500
40049 2012-12-01 24.921 26.144050 16.256200 13.883100
40050 2012-12-03 5.488 2.581351 7.220790 3.349450
40051 2012-12-04 6.977 5.893819 5.548870 2.948770
40052 2012-12-05 7.115 6.533022 5.863820 2.517030
40053 2012-12-06 5.842 8.754232 7.518660 1.447940
40054 2012-12-07 6.346 12.018631 10.263100 11.837400
40055 2012-12-08 17.666 4.548846 10.610400 11.110800
40056 2012-12-10 4.300 2.823566 1.475000 1.989210
40057 2012-12-11 2.415 2.436319 2.677440 2.908270
40058 2012-12-12 2.319 2.121092 3.455550 3.890480
40059 2012-12-13 1.000 1.633918 3.858540 4.316940
40060 2012-12-14 2.238 1.688475 5.065990 5.267850
40061 2012-12-15 1.798 2.621267 7.175370 6.957340
I try to plot it in the following way:
plt.figure(figsize=(17, 10))
plt.setp(plt.xticks()[1], rotation=45)
plt.plot_date(df_cut['date_obj'],df_cut['col1'], color='black', linestyle='-', markersize=3, linewidth=2)
plt.plot_date(df_cut['date_obj'],df_cut['col2'], color='red', linestyle='-', markersize=3)
plt.plot_date(df_cut['date_obj'],df_cut['col3'], color='green', linestyle='-', markersize=3)
plt.plot_date(df_cut['date_obj'],df_cut['col4'], color='blue', linestyle='-', markersize=3)
As a result I get an error:
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
<ipython-input-544-1b8650d1e7e7> in <module>()
/ipython/local/lib/python2.7/site-packages/matplotlib/pyplot.pyc in plot_date(x, y, fmt, tz, xdate, ydate, hold, **kwargs)
2850 try:
2851 ret = ax.plot_date(x, y, fmt=fmt, tz=tz, xdate=xdate, ydate=ydate,
-> 2852 **kwargs)
2853 draw_if_interactive()
2854 finally:
ipython/local/lib/python2.7/site-packages/matplotlib/axes.pyc in plot_date(self, x, y, fmt, tz, xdate, ydate, **kwargs)
4061 if not self._hold: self.cla()
4062
-> 4063 ret = self.plot(x, y, fmt, **kwargs)
4064
4065 if xdate:
ipython/local/lib/python2.7/site-packages/matplotlib/axes.pyc in plot(self, *args, **kwargs)
3994 lines = []
3995
-> 3996 for line in self._get_lines(*args, **kwargs):
3997 self.add_line(line)
3998 lines.append(line)
ipython/local/lib/python2.7/site-packages/matplotlib/axes.pyc in _grab_next_args(self, *args, **kwargs)
328 return
329 if len(remaining) <= 3:
--> 330 for seg in self._plot_args(remaining, kwargs):
331 yield seg
332 return
ipython/local/lib/python2.7/site-packages/matplotlib/axes.pyc in _plot_args(self, tup, kwargs)
306 x = np.arange(y.shape[0], dtype=float)
307
--> 308 x, y = self._xy_from_xy(x, y)
309
310 if self.command == 'plot':
python/local/lib/python2.7/site-packages/matplotlib/axes.pyc in _xy_from_xy(self, x, y)
222 def _xy_from_xy(self, x, y):
223 if self.axes.xaxis is not None and self.axes.yaxis is not None:
--> 224 bx = self.axes.xaxis.update_units(x)
225 by = self.axes.yaxis.update_units(y)
226
ipython/local/lib/python2.7/site-packages/matplotlib/axis.pyc in update_units(self, data)
1299 neednew = self.converter != converter
1300 self.converter = converter
-> 1301 default = self.converter.default_units(data, self)
1302 #print 'update units: default=%s, units=%s'%(default, self.units)
1303 if default is not None and self.units is None:
ipython/local/lib/python2.7/site-packages/matplotlib/dates.pyc in default_units(x, axis)
1156 'Return the tzinfo instance of *x* or of its first element, or None'
1157 try:
-> 1158 x = x[0]
1159 except (TypeError, IndexError):
1160 pass
ipython/local/lib/python2.7/site-packages/pandas/core/series.pyc in __getitem__(self, key)
611 def __getitem__(self, key):
612 try:
--> 613 return self.index.get_value(self, key)
614 except InvalidIndexError:
615 pass
ipython/local/lib/python2.7/site-packages/pandas/core/index.pyc in get_value(self, series, key)
761 """
762 try:
--> 763 return self._engine.get_value(series, key)
764 except KeyError, e1:
765 if len(self) > 0 and self.inferred_type == 'integer':
What is strange, this code works for some data frames and for some it doesn't. The data frames are not different by their structure. The only difference between them is only in values that they contain.
Could anybody please help me to resolve this problem?
Dataframe store dates as numpy.datetime64 objects, not python datetime objects.
Furthermore matplotlib.plot_date uses its own numeric representation of dates.
You could draw your data this way:
plt.plot_date(matplotlib.dates.date2num(pandas.to_datetime(df_cut['date_obj'].values)),df_cut['col1'].values, color='black', linestyle='-', markersize=3, linewidth=2)
Or you could define column 'date_obj' as the index of your data:
df0 = pd.DataFrame.from_records(YourDataSource, columns=['date_obj','col1','col2','col3','col4'],index='date_obj')
And then simply use pandas' plot() attribute:
df0['col1'].plot()

Categories