Memory error while plotting dataframe (matplotlib) - python

I'm using Pandas with Jupyter Notebook and trying to plot a small dataframe:
and when i'm inserting the following line:
df9.plot(x='Time', y='Pressure mean')
I'm getting the following error:
---------------------------------------------------------------------------
MemoryError Traceback (most recent call last)
<ipython-input-8-c789b8162a1a> in <module>()
----> 1 df9.plot(x='Time', y='Pressure mean')
C:\Anaconda3\lib\site-packages\pandas\tools\plotting.py in __call__(self, x, y, kind, ax, subplots, sharex, sharey, layout, figsize, use_index, title, grid, legend, style, logx, logy, loglog, xticks, yticks, xlim, ylim, rot, fontsize, colormap, table, yerr, xerr, secondary_y, sort_columns, **kwds)
3735 fontsize=fontsize, colormap=colormap, table=table,
3736 yerr=yerr, xerr=xerr, secondary_y=secondary_y,
-> 3737 sort_columns=sort_columns, **kwds)
3738 __call__.__doc__ = plot_frame.__doc__
3739
C:\Anaconda3\lib\site-packages\pandas\tools\plotting.py in plot_frame(data, x, y, kind, ax, subplots, sharex, sharey, layout, figsize, use_index, title, grid, legend, style, logx, logy, loglog, xticks, yticks, xlim, ylim, rot, fontsize, colormap, table, yerr, xerr, secondary_y, sort_columns, **kwds)
2609 yerr=yerr, xerr=xerr,
2610 secondary_y=secondary_y, sort_columns=sort_columns,
-> 2611 **kwds)
2612
2613
C:\Anaconda3\lib\site-packages\pandas\tools\plotting.py in _plot(data, x, y, subplots, ax, kind, **kwds)
2436 plot_obj = klass(data, subplots=subplots, ax=ax, kind=kind, **kwds)
2437
-> 2438 plot_obj.generate()
2439 plot_obj.draw()
2440 return plot_obj.result
C:\Anaconda3\lib\site-packages\pandas\tools\plotting.py in generate(self)
1029
1030 for ax in self.axes:
-> 1031 self._post_plot_logic_common(ax, self.data)
1032 self._post_plot_logic(ax, self.data)
1033
C:\Anaconda3\lib\site-packages\pandas\tools\plotting.py in _post_plot_logic_common(self, ax, data)
1157 ax.set_xticklabels(xticklabels)
1158 self._apply_axis_properties(ax.xaxis, rot=self.rot,
-> 1159 fontsize=self.fontsize)
1160 self._apply_axis_properties(ax.yaxis, fontsize=self.fontsize)
1161 elif self.orientation == 'horizontal':
C:\Anaconda3\lib\site-packages\pandas\tools\plotting.py in _apply_axis_properties(self, axis, rot, fontsize)
1205
1206 def _apply_axis_properties(self, axis, rot=None, fontsize=None):
-> 1207 labels = axis.get_majorticklabels() + axis.get_minorticklabels()
1208 for label in labels:
1209 if rot is not None:
C:\Anaconda3\lib\site-packages\matplotlib\axis.py in get_majorticklabels(self)
1159 def get_majorticklabels(self):
1160 'Return a list of Text instances for the major ticklabels'
-> 1161 ticks = self.get_major_ticks()
1162 labels1 = [tick.label1 for tick in ticks if tick.label1On]
1163 labels2 = [tick.label2 for tick in ticks if tick.label2On]
C:\Anaconda3\lib\site-packages\matplotlib\axis.py in get_major_ticks(self, numticks)
1288 'get the tick instances; grow as necessary'
1289 if numticks is None:
-> 1290 numticks = len(self.get_major_locator()())
1291 if len(self.majorTicks) < numticks:
1292 # update the new tick label properties from the old
C:\Anaconda3\lib\site-packages\pandas\tseries\converter.py in __call__(self)
876 vmin, vmax = vmax, vmin
877 if self.isdynamic:
--> 878 locs = self._get_default_locs(vmin, vmax)
879 else: # pragma: no cover
880 base = self.base
C:\Anaconda3\lib\site-packages\pandas\tseries\converter.py in _get_default_locs(self, vmin, vmax)
857
858 if self.plot_obj.date_axis_info is None:
--> 859 self.plot_obj.date_axis_info = self.finder(vmin, vmax, self.freq)
860
861 locator = self.plot_obj.date_axis_info
C:\Anaconda3\lib\site-packages\pandas\tseries\converter.py in _daily_finder(vmin, vmax, freq)
481 Period(ordinal=int(vmax), freq=freq))
482 span = vmax.ordinal - vmin.ordinal + 1
--> 483 dates_ = PeriodIndex(start=vmin, end=vmax, freq=freq)
484 # Initialize the output
485 info = np.zeros(span,
C:\Anaconda3\lib\site-packages\pandas\tseries\period.py in __new__(cls, data, ordinal, freq, start, end, periods, copy, name, tz, **kwargs)
186 else:
187 data, freq = cls._generate_range(start, end, periods,
--> 188 freq, kwargs)
189 else:
190 ordinal, freq = cls._from_arraylike(data, freq, tz)
C:\Anaconda3\lib\site-packages\pandas\tseries\period.py in _generate_range(cls, start, end, periods, freq, fields)
200 raise ValueError('Can either instantiate from fields '
201 'or endpoints, but not both')
--> 202 subarr, freq = _get_ordinal_range(start, end, periods, freq)
203 elif field_count > 0:
204 subarr, freq = _range_from_fields(freq=freq, **fields)
C:\Anaconda3\lib\site-packages\pandas\tseries\period.py in _get_ordinal_range(start, end, periods, freq, mult)
1026 dtype=np.int64)
1027 else:
-> 1028 data = np.arange(start.ordinal, end.ordinal + 1, mult, dtype=np.int64)
1029
1030 return data, freq
MemoryError:
What is the problem ? I can't figure it out.
Thanks !

The issue originates from using TimedeltaIndex (or timedelta) for your time column. It was reported there: https://github.com/pydata/pandas/issues/8711
No solution has been brought to us yet.
As an alternative solution, I invite you to convert your data to DateTime or DateTimeIndex. Let's say YourDate contains the starting date of your observations.
df9.index = pd.DatetimeIndex(pd.datetime.strptime(YourDate,'%d.%m.%Y %H:%M:%S')
+df9['Time'])
df9.plot(y='Pressure mean')
Note that it will plot only the hours if you have less than 24 hours.
EDIT (2016-11-07):
I can now use timedelta as index and plot correctly. This is how I proceed (assuming I have float numbers indicating hours):
converter = {'Time[h]' : lambda x: pd.to_timedelta(float(x),unit='h')}#converts float to timedelta
df = pd.read_csv(fpath, sep='\t',
skiprows=len(comments),#header
names=dt.keys(),#you need of course your own dtype
dtype=dt,#you need of course your own dtype
encoding='latin-1',#European data...
skipinitialspace=True,
converters=converter)
df = df.set_index('Time[h]')#time column to index.

As Wli mentioned, it is a bug still to be fixed. But as a workaround this worked for me. -
plt.plot(s.index,s.values)

Related

I have a problem when I try run a matplotlib command in Python

I have a dataset in python, that is operating is ok, but when I try visualize in matplotlib continuously get a mistake.
I type a viz.plot() command and this is:
This message is.:
TypeError Traceback (most recent call last)
<ipython-input-18-7608c70ebd6e> in <module>
----> 1 viz.plot()
/usr/lib/python3/dist-packages/pandas/plotting/_core.py in __call__(self, x, y, kind, ax, subplots, sharex, sharey, layout, figsize, use_index, title, grid, legend, style, logx, logy, loglog, xticks, yticks, xlim, ylim, rot, fontsize, colormap, table, yerr, xerr, secondary_y, sort_columns, **kwds)
2939 fontsize=fontsize, colormap=colormap, table=table,
2940 yerr=yerr, xerr=xerr, secondary_y=secondary_y,
-> 2941 sort_columns=sort_columns, **kwds)
2942 __call__.__doc__ = plot_frame.__doc__
2943
/usr/lib/python3/dist-packages/pandas/plotting/_core.py in plot_frame(data, x, y, kind, ax, subplots, sharex, sharey, layout, figsize, use_index, title, grid, legend, style, logx, logy, loglog, xticks, yticks, xlim, ylim, rot, fontsize, colormap, table, yerr, xerr, secondary_y, sort_columns, **kwds)
1975 yerr=yerr, xerr=xerr,
1976 secondary_y=secondary_y, sort_columns=sort_columns,
-> 1977 **kwds)
1978
1979
/usr/lib/python3/dist-packages/pandas/plotting/_core.py in _plot(data, x, y, subplots, ax, kind, **kwds)
1802 plot_obj = klass(data, subplots=subplots, ax=ax, kind=kind, **kwds)
1803
-> 1804 plot_obj.generate()
1805 plot_obj.draw()
1806 return plot_obj.result
/usr/lib/python3/dist-packages/pandas/plotting/_core.py in generate(self)
256 def generate(self):
257 self._args_adjust()
--> 258 self._compute_plot_data()
259 self._setup_subplots()
260 self._make_plot()
/usr/lib/python3/dist-packages/pandas/plotting/_core.py in _compute_plot_data(self)
361 "datetime",
362 "datetimetz",
--> 363 "timedelta"])
364
365 try:
/usr/lib/python3/dist-packages/pandas/core/frame.py in select_dtypes(self, include, exclude)
3075 # the "union" of the logic of case 1 and case 2:
3076 # we get the included and excluded, and return their logical and
-> 3077 include_these = Series(not bool(include), index=self.columns)
3078 exclude_these = Series(not bool(exclude), index=self.columns)
3079
/usr/lib/python3/dist-packages/pandas/core/series.py in __init__(self, data, index, dtype, name, copy, fastpath)
273 else:
274 data = _sanitize_array(data, index, dtype, copy,
--> 275 raise_cast_failure=True)
276
277 data = SingleBlockManager(data, index, fastpath=True)
/usr/lib/python3/dist-packages/pandas/core/series.py in _sanitize_array(data, index, dtype, copy, raise_cast_failure)
4147
4148 subarr = construct_1d_arraylike_from_scalar(
-> 4149 value, len(index), dtype)
4150
4151 else:
/usr/lib/python3/dist-packages/pandas/core/dtypes/cast.py in construct_1d_arraylike_from_scalar(value, length, dtype)
1199 if is_integer_dtype(dtype) and isna(value):
1200 dtype = np.float64
-> 1201 subarr = np.empty(length, dtype=dtype)
1202 subarr.fill(value)
1203
TypeError: Cannot interpret '<attribute 'dtype' of 'numpy.generic' objects>' as a data type
Does anyone know what the solution might be??
Thank you very much in advance for your response.:
Gabor-Gabor
I suppose "viz" is your dataframe.
To make use of plot() you need to give 2 parameters: plt.plot(x,y) or plt.plot(x,y,viz)

Adding legend to matplotlib scatterplot

I am following the Randy Olson approach to make beautiful time trends graphs (see here).
When I plot the following code:
tableau20 = [(31, 119, 180), (174, 199, 232)]
for i in range(len(tableau20)):
r, g, b = tableau20[i]
tableau20[i] = (r / 255., g / 255., b / 255.)
plt.figure(figsize=(12, 14))
# Remove the plot frame lines. They are unnecessary chartjunk.
ax = plt.subplot(111)
ax.spines["top"].set_visible(False)
ax.spines["bottom"].set_visible(False)
ax.spines["right"].set_visible(False)
ax.spines["left"].set_visible(False)
# Ensure that the axis ticks only show up on the bottom and left of the plot.
# Ticks on the right and top of the plot are generally unnecessary chartjunk.
ax.get_xaxis().tick_bottom()
ax.get_yaxis().tick_left()
majors = ['Number of findings (total)', 'Business Ethics Findings']
for rank, column in enumerate(majors):
plt.plot(monthly_df.argrc__End_Date__c.values, monthly_df[column.replace("\n", " ")].values,
lw=2.5, color=tableau20[rank])
y_pos = monthly_df[column.replace("\n", " ")].values[-1] - 0.5
if column == "Number of findings (total)":
y_pos += 0.5
elif column == 'Business Ethics Findings':
y_pos -= 0.5
plt.text(2018.1, y_pos, column, fontsize=12, color=tableau20[rank])
I get this error:
If I eliminate
plt.text(2016, y_pos, column, fontsize=12, color=tableau20[rank])
Instead I get the two lines plotted on the graph correctly, but with no legend. How do I show the names of my columns?
EDIT
I am adding here the traceback for further information. I hope this is helpful.
C:\Users\filippo.sebastio\Anaconda3\lib\site-packages\matplotlib\cbook\deprecation.py:107: MatplotlibDeprecationWarning: Adding an axes using the same arguments as a previous axes currently reuses the earlier instance. In a future version, a new instance will always be created and returned. Meanwhile, this warning can be suppressed, and the future behavior ensured, by passing a unique label to each axes instance.
warnings.warn(message, mplDeprecation, stacklevel=1)
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
~\Anaconda3\lib\site-packages\IPython\core\formatters.py in __call__(self, obj)
339 pass
340 else:
--> 341 return printer(obj)
342 # Finally look for special method names
343 method = get_real_method(obj, self.print_method)
~\Anaconda3\lib\site-packages\IPython\core\pylabtools.py in <lambda>(fig)
239
240 if 'png' in formats:
--> 241 png_formatter.for_type(Figure, lambda fig: print_figure(fig, 'png', **kwargs))
242 if 'retina' in formats or 'png2x' in formats:
243 png_formatter.for_type(Figure, lambda fig: retina_figure(fig, **kwargs))
~\Anaconda3\lib\site-packages\IPython\core\pylabtools.py in print_figure(fig, fmt, bbox_inches, **kwargs)
123
124 bytes_io = BytesIO()
--> 125 fig.canvas.print_figure(bytes_io, **kw)
126 data = bytes_io.getvalue()
127 if fmt == 'svg':
~\Anaconda3\lib\site-packages\matplotlib\backend_bases.py in print_figure(self, filename, dpi, facecolor, edgecolor, orientation, format, **kwargs)
2261 orientation=orientation,
2262 bbox_inches_restore=_bbox_inches_restore,
-> 2263 **kwargs)
2264 finally:
2265 if bbox_inches and restore_bbox:
~\Anaconda3\lib\site-packages\matplotlib\backends\backend_agg.py in print_png(self, filename_or_obj, *args, **kwargs)
515
516 def print_png(self, filename_or_obj, *args, **kwargs):
--> 517 FigureCanvasAgg.draw(self)
518 renderer = self.get_renderer()
519 original_dpi = renderer.dpi
~\Anaconda3\lib\site-packages\matplotlib\backends\backend_agg.py in draw(self)
427 Draw the figure using the renderer
428 """
--> 429 self.renderer = self.get_renderer(cleared=True)
430 # acquire a lock on the shared font cache
431 RendererAgg.lock.acquire()
~\Anaconda3\lib\site-packages\matplotlib\backends\backend_agg.py in get_renderer(self, cleared)
452
453 if need_new_renderer:
--> 454 self.renderer = RendererAgg(w, h, self.figure.dpi)
455 self._lastKey = key
456 elif cleared:
~\Anaconda3\lib\site-packages\matplotlib\backends\backend_agg.py in __init__(self, width, height, dpi)
99 self.width = width
100 self.height = height
--> 101 self._renderer = _RendererAgg(int(width), int(height), dpi)
102 self._filter_renderers = []
103
ValueError: Image size of 312943x821 pixels is too large. It must be less than 2^16 in each direction.
<Figure size 864x1008 with 1 Axes>

Plotting dataframe raises error of ordinal value must be >= 1

I follow the tutorial at http://nbviewer.ipython.org/github/jvns/pandas-cookbook/blob/v0.1/cookbook/Chapter%205%20-%20Combining%20dataframes%20and%20scraping%20Canadian%20weather%20data.ipynb
I have a pandas dataframe
weather_mar2012['Temp (°C)']
Out[30]:
Date/Time
2012-03-01 00:00:00 -5.5
2012-03-01 01:00:00 -5.7
2012-03-01 02:00:00 -5.4
When trying to plot it i get an error
weather_mar2012['Temp (°C)'].plot(figsize=(15, 5))
---------------------------------------------------------------------------
ValueError Traceback (most recent call last) <ipython-input-31-21c79ba7d5ef> in <module>()
----> 1 weather_mar2012['Temp (°C)'].plot(figsize=(15, 5))
/home/vagrant/anaconda3/lib/python3.4/site-packages/pandas/tools/plotting.py in plot_series(data, kind, ax, figsize, use_index, title, grid, legend, style, logx, logy, loglog, xticks, yticks, xlim, ylim, rot, fontsize, colormap, table, yerr, xerr, label, secondary_y, **kwds) 2486 yerr=yerr, xerr=xerr, 2487 label=label, secondary_y=secondary_y,
-> 2488 **kwds) 2489 2490
/home/vagrant/anaconda3/lib/python3.4/site-packages/pandas/tools/plotting.py in _plot(data, x, y, subplots, ax, kind, **kwds) 2292 plot_obj = klass(data, subplots=subplots, ax=ax, kind=kind, **kwds) 2293
-> 2294 plot_obj.generate() 2295 plot_obj.draw() 2296 return plot_obj.result
/home/vagrant/anaconda3/lib/python3.4/site-packages/pandas/tools/plotting.py in generate(self)
922 self._make_legend()
923 self._post_plot_logic()
--> 924 self._adorn_subplots()
925
926 def _args_adjust(self):
/home/vagrant/anaconda3/lib/python3.4/site-packages/pandas/tools/plotting.py in _adorn_subplots(self) 1052 ax.set_xticklabels(xticklabels) 1053 self._apply_axis_properties(ax.xaxis, rot=self.rot,
-> 1054 fontsize=self.fontsize) 1055 elif self.orientation == 'horizontal': 1056 if self._need_to_set_index:
/home/vagrant/anaconda3/lib/python3.4/site-packages/pandas/tools/plotting.py in _apply_axis_properties(self, axis, rot, fontsize) 1061 1062 def _apply_axis_properties(self, axis, rot=None, fontsize=None):
-> 1063 labels = axis.get_majorticklabels() + axis.get_minorticklabels() 1064 for label in labels: 1065 if rot is not None:
/home/vagrant/anaconda3/lib/python3.4/site-packages/matplotlib/axis.py in get_majorticklabels(self) 1155 def get_majorticklabels(self): 1156 'Return a list of Text instances for the major ticklabels'
-> 1157 ticks = self.get_major_ticks() 1158 labels1 = [tick.label1 for tick in ticks if tick.label1On] 1159 labels2 = [tick.label2 for tick in ticks if tick.label2On]
/home/vagrant/anaconda3/lib/python3.4/site-packages/matplotlib/axis.py in get_major_ticks(self, numticks) 1284 'get the tick instances; grow as necessary' 1285 if numticks is None:
-> 1286 numticks = len(self.get_major_locator()()) 1287 if len(self.majorTicks) < numticks: 1288 # update the new tick label properties from the old
/home/vagrant/anaconda3/lib/python3.4/site-packages/matplotlib/dates.py in __call__(self)
863 def __call__(self):
864 'Return the locations of the ticks'
--> 865 self.refresh()
866 return self._locator()
867
/home/vagrant/anaconda3/lib/python3.4/site-packages/matplotlib/dates.py in refresh(self)
880 def refresh(self):
881 'Refresh internal information based on current limits.'
--> 882 dmin, dmax = self.viewlim_to_dt()
883 self._locator = self.get_locator(dmin, dmax)
884
/home/vagrant/anaconda3/lib/python3.4/site-packages/matplotlib/dates.py in viewlim_to_dt(self)
624 def viewlim_to_dt(self):
625 vmin, vmax = self.axis.get_view_interval()
--> 626 return num2date(vmin, self.tz), num2date(vmax, self.tz)
627
628 def _get_unit(self):
/home/vagrant/anaconda3/lib/python3.4/site-packages/matplotlib/dates.py in num2date(x, tz)
343 tz = _get_rc_timezone()
344 if not cbook.iterable(x):
--> 345 return _from_ordinalf(x, tz)
346 else:
347 x = np.asarray(x)
/home/vagrant/anaconda3/lib/python3.4/site-packages/matplotlib/dates.py in _from_ordinalf(x, tz)
223 tz = _get_rc_timezone()
224 ix = int(x)
--> 225 dt = datetime.datetime.fromordinal(ix)
226 remainder = float(x) - ix
227 hour, remainder = divmod(24 * remainder, 1)
ValueError: ordinal must be >= 1
What does it mean?
How can i fix this?
I was getting this error in ipython even with current pandas 0.20.3
Traced it down to having run a script beforehand which saved a figure with a different index, but hadn't done plt.show() as the figure had been saved and I didn't need to see it.
So as #naught101 hinted, forcing plt.close('all') before showing the next figure fixes the issue. Probably good practice at the end of scripts anyway.
This was a bug in pandas: 0.18.1 and fixed in 0.19.2, eg run conda upgrade pandas

Why KeyError while plottin a pandas data frame with matplotlib? [duplicate]

This question already has answers here:
KeyError when plotting a sliced pandas dataframe with datetimes
(3 answers)
Closed 7 years ago.
I have this data frame:
date_obj col1 col2 col3 col4
40038 2012-11-19 1.000 0.831856 0.986209 0.843919
40039 2012-11-20 2.015 0.521764 1.177320 0.938245
40040 2012-11-21 1.160 1.645345 1.964620 4.536440
40041 2012-11-22 3.171 2.444018 2.931550 3.737840
40042 2012-11-23 4.563 3.208111 3.587250 2.434040
40043 2012-11-24 5.379 3.863732 3.824540 1.634780
40044 2012-11-26 1.125 20.756739 4.162820 23.552100
40045 2012-11-27 3.340 5.369354 4.535090 1.129290
40046 2012-11-28 5.463 12.185730 8.102790 1.224300
40047 2012-11-29 6.596 14.328685 9.271000 24.655600
40048 2012-11-30 31.544 13.513497 12.103400 21.273500
40049 2012-12-01 24.921 26.144050 16.256200 13.883100
40050 2012-12-03 5.488 2.581351 7.220790 3.349450
40051 2012-12-04 6.977 5.893819 5.548870 2.948770
40052 2012-12-05 7.115 6.533022 5.863820 2.517030
40053 2012-12-06 5.842 8.754232 7.518660 1.447940
40054 2012-12-07 6.346 12.018631 10.263100 11.837400
40055 2012-12-08 17.666 4.548846 10.610400 11.110800
40056 2012-12-10 4.300 2.823566 1.475000 1.989210
40057 2012-12-11 2.415 2.436319 2.677440 2.908270
40058 2012-12-12 2.319 2.121092 3.455550 3.890480
40059 2012-12-13 1.000 1.633918 3.858540 4.316940
40060 2012-12-14 2.238 1.688475 5.065990 5.267850
40061 2012-12-15 1.798 2.621267 7.175370 6.957340
I try to plot it in the following way:
plt.figure(figsize=(17, 10))
plt.setp(plt.xticks()[1], rotation=45)
plt.plot_date(df_cut['date_obj'],df_cut['col1'], color='black', linestyle='-', markersize=3, linewidth=2)
plt.plot_date(df_cut['date_obj'],df_cut['col2'], color='red', linestyle='-', markersize=3)
plt.plot_date(df_cut['date_obj'],df_cut['col3'], color='green', linestyle='-', markersize=3)
plt.plot_date(df_cut['date_obj'],df_cut['col4'], color='blue', linestyle='-', markersize=3)
As a result I get an error:
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
<ipython-input-544-1b8650d1e7e7> in <module>()
/ipython/local/lib/python2.7/site-packages/matplotlib/pyplot.pyc in plot_date(x, y, fmt, tz, xdate, ydate, hold, **kwargs)
2850 try:
2851 ret = ax.plot_date(x, y, fmt=fmt, tz=tz, xdate=xdate, ydate=ydate,
-> 2852 **kwargs)
2853 draw_if_interactive()
2854 finally:
ipython/local/lib/python2.7/site-packages/matplotlib/axes.pyc in plot_date(self, x, y, fmt, tz, xdate, ydate, **kwargs)
4061 if not self._hold: self.cla()
4062
-> 4063 ret = self.plot(x, y, fmt, **kwargs)
4064
4065 if xdate:
ipython/local/lib/python2.7/site-packages/matplotlib/axes.pyc in plot(self, *args, **kwargs)
3994 lines = []
3995
-> 3996 for line in self._get_lines(*args, **kwargs):
3997 self.add_line(line)
3998 lines.append(line)
ipython/local/lib/python2.7/site-packages/matplotlib/axes.pyc in _grab_next_args(self, *args, **kwargs)
328 return
329 if len(remaining) <= 3:
--> 330 for seg in self._plot_args(remaining, kwargs):
331 yield seg
332 return
ipython/local/lib/python2.7/site-packages/matplotlib/axes.pyc in _plot_args(self, tup, kwargs)
306 x = np.arange(y.shape[0], dtype=float)
307
--> 308 x, y = self._xy_from_xy(x, y)
309
310 if self.command == 'plot':
python/local/lib/python2.7/site-packages/matplotlib/axes.pyc in _xy_from_xy(self, x, y)
222 def _xy_from_xy(self, x, y):
223 if self.axes.xaxis is not None and self.axes.yaxis is not None:
--> 224 bx = self.axes.xaxis.update_units(x)
225 by = self.axes.yaxis.update_units(y)
226
ipython/local/lib/python2.7/site-packages/matplotlib/axis.pyc in update_units(self, data)
1299 neednew = self.converter != converter
1300 self.converter = converter
-> 1301 default = self.converter.default_units(data, self)
1302 #print 'update units: default=%s, units=%s'%(default, self.units)
1303 if default is not None and self.units is None:
ipython/local/lib/python2.7/site-packages/matplotlib/dates.pyc in default_units(x, axis)
1156 'Return the tzinfo instance of *x* or of its first element, or None'
1157 try:
-> 1158 x = x[0]
1159 except (TypeError, IndexError):
1160 pass
ipython/local/lib/python2.7/site-packages/pandas/core/series.pyc in __getitem__(self, key)
611 def __getitem__(self, key):
612 try:
--> 613 return self.index.get_value(self, key)
614 except InvalidIndexError:
615 pass
ipython/local/lib/python2.7/site-packages/pandas/core/index.pyc in get_value(self, series, key)
761 """
762 try:
--> 763 return self._engine.get_value(series, key)
764 except KeyError, e1:
765 if len(self) > 0 and self.inferred_type == 'integer':
What is strange, this code works for some data frames and for some it doesn't. The data frames are not different by their structure. The only difference between them is only in values that they contain.
Could anybody please help me to resolve this problem?
Dataframe store dates as numpy.datetime64 objects, not python datetime objects.
Furthermore matplotlib.plot_date uses its own numeric representation of dates.
You could draw your data this way:
plt.plot_date(matplotlib.dates.date2num(pandas.to_datetime(df_cut['date_obj'].values)),df_cut['col1'].values, color='black', linestyle='-', markersize=3, linewidth=2)
Or you could define column 'date_obj' as the index of your data:
df0 = pd.DataFrame.from_records(YourDataSource, columns=['date_obj','col1','col2','col3','col4'],index='date_obj')
And then simply use pandas' plot() attribute:
df0['col1'].plot()

Bar chart in pandas on time series data

I am trying to do bar chart in pandas on time series data.
Documentation says it is not possible: http://pandas.pydata.org/pandas-docs/stable/visualization.html#bar-plots
Is there some workaround ?
This is my code
# there must be ORDER BY, other wise rows will not be ordered
df = sql.read_frame("SELECT * FROM hzmo_report ORDER BY datum;", cnx, index_col='datum')
df.index = pd.to_datetime(df.index) # converting to DatetimeIndex
df['korisnika'].plot(ax=axs1[0], title='SOMETHING', marker='o')
df['korisnika'].diff().plot(ax=axs1[1], title='SOMETHING', marker='o') # i would like this to be bar plot
If I do
df['korisnika'].diff().plot(kind='bar', ax=axs1[1], title='SOMETHING', marker='o')
I have just added kind='bar'
I get:
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-109-d41eb2b2e3a7> in <module>()
36 fig1.suptitle('Umirovljenici', fontsize=16)
37 df['korisnika'].plot(ax=axs1[0], title='Broj korisnika mirovine', marker='o')
---> 38 ( df['korisnika'].diff() ).plot(ax=axs1[1], kind='bar', title='Apsolutna razlika naspram prethodnog mjeseca', marker='o')
39 #df['korisnika'].diff().hist()
40
C:\Documents and Settings\hr1ub098\Application Data\Python\Python27\site-packages\pandas\tools\plotting.pyc in plot_series(series, label, kind, use_index, rot, xticks, yticks, xlim, ylim, ax, style, grid, legend, logy, secondary_y, **kwds)
1504 secondary_y=secondary_y, **kwds)
1505
-> 1506 plot_obj.generate()
1507 plot_obj.draw()
1508
C:\Documents and Settings\hr1ub098\Application Data\Python\Python27\site-packages\pandas\tools\plotting.pyc in generate(self)
731 self._compute_plot_data()
732 self._setup_subplots()
--> 733 self._make_plot()
734 self._post_plot_logic()
735 self._adorn_subplots()
C:\Documents and Settings\hr1ub098\Application Data\Python\Python27\site-packages\pandas\tools\plotting.pyc in _make_plot(self)
1291 else:
1292 rect = bar_f(ax, self.ax_pos + i * 0.75 / K, y, 0.75 / K,
-> 1293 start=pos_prior, label=label, **kwds)
1294 rects.append(rect)
1295 labels.append(label)
C:\Documents and Settings\hr1ub098\Application Data\Python\Python27\site-packages\pandas\tools\plotting.pyc in f(ax, x, y, w, start, **kwds)
1251 if self.kind == 'bar':
1252 def f(ax, x, y, w, start=None, **kwds):
-> 1253 return ax.bar(x, y, w, bottom=start, **kwds)
1254 elif self.kind == 'barh':
1255 def f(ax, x, y, w, start=None, **kwds):
C:\Documents and Settings\hr1ub098\Application Data\Python\Python27\site-packages\matplotlib\axes.pyc in bar(self, left, height, width, bottom, **kwargs)
4779 label='_nolegend_'
4780 )
-> 4781 r.update(kwargs)
4782 r.get_path()._interpolation_steps = 100
4783 #print r.get_label(), label, 'label' in kwargs
C:\Documents and Settings\hr1ub098\Application Data\Python\Python27\site-packages\matplotlib\artist.pyc in update(self, props)
657 func = getattr(self, 'set_'+k, None)
658 if func is None or not callable(func):
--> 659 raise AttributeError('Unknown property %s'%k)
660 func(v)
661 changed = True
AttributeError: Unknown property marker
You can plot a bar-plot of a time-series. Not that useful IMHO though.
ts = Series(randn(20),date_range('20130101',periods=20))
ts.plot()
A time-series line-plot
A Bar Plot

Categories