Adding legend to matplotlib scatterplot - python

I am following the Randy Olson approach to make beautiful time trends graphs (see here).
When I plot the following code:
tableau20 = [(31, 119, 180), (174, 199, 232)]
for i in range(len(tableau20)):
r, g, b = tableau20[i]
tableau20[i] = (r / 255., g / 255., b / 255.)
plt.figure(figsize=(12, 14))
# Remove the plot frame lines. They are unnecessary chartjunk.
ax = plt.subplot(111)
ax.spines["top"].set_visible(False)
ax.spines["bottom"].set_visible(False)
ax.spines["right"].set_visible(False)
ax.spines["left"].set_visible(False)
# Ensure that the axis ticks only show up on the bottom and left of the plot.
# Ticks on the right and top of the plot are generally unnecessary chartjunk.
ax.get_xaxis().tick_bottom()
ax.get_yaxis().tick_left()
majors = ['Number of findings (total)', 'Business Ethics Findings']
for rank, column in enumerate(majors):
plt.plot(monthly_df.argrc__End_Date__c.values, monthly_df[column.replace("\n", " ")].values,
lw=2.5, color=tableau20[rank])
y_pos = monthly_df[column.replace("\n", " ")].values[-1] - 0.5
if column == "Number of findings (total)":
y_pos += 0.5
elif column == 'Business Ethics Findings':
y_pos -= 0.5
plt.text(2018.1, y_pos, column, fontsize=12, color=tableau20[rank])
I get this error:
If I eliminate
plt.text(2016, y_pos, column, fontsize=12, color=tableau20[rank])
Instead I get the two lines plotted on the graph correctly, but with no legend. How do I show the names of my columns?
EDIT
I am adding here the traceback for further information. I hope this is helpful.
C:\Users\filippo.sebastio\Anaconda3\lib\site-packages\matplotlib\cbook\deprecation.py:107: MatplotlibDeprecationWarning: Adding an axes using the same arguments as a previous axes currently reuses the earlier instance. In a future version, a new instance will always be created and returned. Meanwhile, this warning can be suppressed, and the future behavior ensured, by passing a unique label to each axes instance.
warnings.warn(message, mplDeprecation, stacklevel=1)
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
~\Anaconda3\lib\site-packages\IPython\core\formatters.py in __call__(self, obj)
339 pass
340 else:
--> 341 return printer(obj)
342 # Finally look for special method names
343 method = get_real_method(obj, self.print_method)
~\Anaconda3\lib\site-packages\IPython\core\pylabtools.py in <lambda>(fig)
239
240 if 'png' in formats:
--> 241 png_formatter.for_type(Figure, lambda fig: print_figure(fig, 'png', **kwargs))
242 if 'retina' in formats or 'png2x' in formats:
243 png_formatter.for_type(Figure, lambda fig: retina_figure(fig, **kwargs))
~\Anaconda3\lib\site-packages\IPython\core\pylabtools.py in print_figure(fig, fmt, bbox_inches, **kwargs)
123
124 bytes_io = BytesIO()
--> 125 fig.canvas.print_figure(bytes_io, **kw)
126 data = bytes_io.getvalue()
127 if fmt == 'svg':
~\Anaconda3\lib\site-packages\matplotlib\backend_bases.py in print_figure(self, filename, dpi, facecolor, edgecolor, orientation, format, **kwargs)
2261 orientation=orientation,
2262 bbox_inches_restore=_bbox_inches_restore,
-> 2263 **kwargs)
2264 finally:
2265 if bbox_inches and restore_bbox:
~\Anaconda3\lib\site-packages\matplotlib\backends\backend_agg.py in print_png(self, filename_or_obj, *args, **kwargs)
515
516 def print_png(self, filename_or_obj, *args, **kwargs):
--> 517 FigureCanvasAgg.draw(self)
518 renderer = self.get_renderer()
519 original_dpi = renderer.dpi
~\Anaconda3\lib\site-packages\matplotlib\backends\backend_agg.py in draw(self)
427 Draw the figure using the renderer
428 """
--> 429 self.renderer = self.get_renderer(cleared=True)
430 # acquire a lock on the shared font cache
431 RendererAgg.lock.acquire()
~\Anaconda3\lib\site-packages\matplotlib\backends\backend_agg.py in get_renderer(self, cleared)
452
453 if need_new_renderer:
--> 454 self.renderer = RendererAgg(w, h, self.figure.dpi)
455 self._lastKey = key
456 elif cleared:
~\Anaconda3\lib\site-packages\matplotlib\backends\backend_agg.py in __init__(self, width, height, dpi)
99 self.width = width
100 self.height = height
--> 101 self._renderer = _RendererAgg(int(width), int(height), dpi)
102 self._filter_renderers = []
103
ValueError: Image size of 312943x821 pixels is too large. It must be less than 2^16 in each direction.
<Figure size 864x1008 with 1 Axes>

Related

NonUniformImage: numpy example gives 'cannot unpack non-iterable NoneType object' error 2D-Histogram

I'm trying to run this very simple example from numpy page regarding histogram2d:
https://numpy.org/doc/stable/reference/generated/numpy.histogram2d.html.
from matplotlib.image import NonUniformImage
import matplotlib.pyplot as plt
xedges = [0, 1, 3, 5]
yedges = [0, 2, 3, 4, 6]
x = np.random.normal(2, 1, 100)
y = np.random.normal(1, 1, 100)
H, xedges, yedges = np.histogram2d(x, y, bins=(xedges, yedges))
H = H.T
fig = plt.figure(figsize=(7, 3))
ax = fig.add_subplot(131, title='imshow: square bins')
plt.imshow(H, interpolation='nearest', origin='lower',extent=[xedges[0], xedges[-1], yedges[0], yedges[-1]])
ax = fig.add_subplot(132, title='pcolormesh: actual edges',aspect='equal')
X, Y = np.meshgrid(xedges, yedges)
ax.pcolormesh(X, Y, H)
ax = fig.add_subplot(133, title='NonUniformImage: interpolated',aspect='equal', xlim=xedges[[0, -1]], ylim=yedges[[0, -1]])
im = NonUniformImage(ax, interpolation='bilinear')
xcenters = (xedges[:-1] + xedges[1:]) / 2
ycenters = (yedges[:-1] + yedges[1:]) / 2
im.set_data(xcenters,ycenters,H)
ax.images.append(im)
plt.show()
By running this code as in the example, I receive the error
cannot unpack non-iterable NoneType object
This happens as soon as I run the line ax.images.append(im).
Does anyone know why this happens?
Tried to run an example from numpy website and doesn't work as expected.
The full error message is:
TypeError Traceback (most recent call last)
File ~\anaconda3\lib\site-packages\IPython\core\formatters.py:339, in BaseFormatter.__call__(self, obj)
337 pass
338 else:
--> 339 return printer(obj)
340 # Finally look for special method names
341 method = get_real_method(obj, self.print_method)
File ~\anaconda3\lib\site-packages\IPython\core\pylabtools.py:151, in print_figure(fig, fmt, bbox_inches, base64, **kwargs)
148 from matplotlib.backend_bases import FigureCanvasBase
149 FigureCanvasBase(fig)
--> 151 fig.canvas.print_figure(bytes_io, **kw)
152 data = bytes_io.getvalue()
153 if fmt == 'svg':
File ~\anaconda3\lib\site-packages\matplotlib\backend_bases.py:2299, in FigureCanvasBase.print_figure(self, filename, dpi, facecolor, edgecolor, orientation, format, bbox_inches, pad_inches, bbox_extra_artists, backend, **kwargs)
2297 if bbox_inches:
2298 if bbox_inches == "tight":
-> 2299 bbox_inches = self.figure.get_tightbbox(
2300 renderer, bbox_extra_artists=bbox_extra_artists)
2301 if pad_inches is None:
2302 pad_inches = rcParams['savefig.pad_inches']
File ~\anaconda3\lib\site-packages\matplotlib\figure.py:1632, in FigureBase.get_tightbbox(self, renderer, bbox_extra_artists)
1629 artists = bbox_extra_artists
1631 for a in artists:
-> 1632 bbox = a.get_tightbbox(renderer)
1633 if bbox is not None and (bbox.width != 0 or bbox.height != 0):
1634 bb.append(bbox)
File ~\anaconda3\lib\site-packages\matplotlib\axes\_base.py:4666, in _AxesBase.get_tightbbox(self, renderer, call_axes_locator, bbox_extra_artists, for_layout_only)
4662 if np.all(clip_extent.extents == axbbox.extents):
4663 # clip extent is inside the Axes bbox so don't check
4664 # this artist
4665 continue
-> 4666 bbox = a.get_tightbbox(renderer)
4667 if (bbox is not None
4668 and 0 < bbox.width < np.inf
4669 and 0 < bbox.height < np.inf):
4670 bb.append(bbox)
File ~\anaconda3\lib\site-packages\matplotlib\artist.py:355, in Artist.get_tightbbox(self, renderer)
340 def get_tightbbox(self, renderer):
341 """
342 Like `.Artist.get_window_extent`, but includes any clipping.
343
(...)
353 The enclosing bounding box (in figure pixel coordinates).
354 """
--> 355 bbox = self.get_window_extent(renderer)
356 if self.get_clip_on():
357 clip_box = self.get_clip_box()
File ~\anaconda3\lib\site-packages\matplotlib\image.py:943, in AxesImage.get_window_extent(self, renderer)
942 def get_window_extent(self, renderer=None):
--> 943 x0, x1, y0, y1 = self._extent
944 bbox = Bbox.from_extents([x0, y0, x1, y1])
945 return bbox.transformed(self.axes.transData)
TypeError: cannot unpack non-iterable NoneType object
<Figure size 504x216 with 3 Axes>
The error occurs deep in the append call, and appears to involve trying to get information about the plot window. If I comment out the append line, and it continues on to the plt.show(), and resulting image looks like the example, except the third image is blank.
I tested this in a Windows QtConsole; I don't know if that context posses problems for this append or not. I don't think it's a problem with your code copy.

"KeyError: ('f', None)" when trying to plot legend with some points

I'm trying to plot two individual points on a hexbin plot with colors, markers, and labels. When I try include a legend, I get a KeyError. Note I'm doing the same thing with two different sets of data, and it works fine with one set and doesn't work with the other.
I first tried using plt.plot, then with plt.scatter. I tried using different colors. I tried explicitly stating the color and marker. I can't find anything online that describes the same issue.
fig=plt.figure(figsize=(10,7))
plt.hexbin(x,y)
plt.colorbar()
plt.scatter(x1,y1,c='w',marker='*',label='Field 1')
plt.scatter(x2,y2,c='w',marker='^',label='Field 2')
plt.legend(loc='lower right')
plt.show()
For my other data, this plots just fine. But here, I get an error:
KeyError Traceback (most recent call last)
~\Anaconda3\lib\site-packages\matplotlib\colors.py in to_rgba(c, alpha)
165 try:
--> 166 rgba = _colors_full_map.cache[c, alpha]
167 except (KeyError, TypeError): # Not in cache, or unhashable.
KeyError: ('f', None)
During handling of the above exception, another exception occurred:
ValueError Traceback (most recent call last)
<ipython-input-33-69334a5c3996> in <module>()
21 plt.scatter(zb,np.log(massb),c='w',marker='*',label='Field 1')
22 plt.scatter(za,np.log(massa),c='w',marker='^',label='Field 2')
---> 23 plt.legend(loc='lower right')
24 #plt.title('Stellar mass vs sSFR')
25 plt.show()
~\Anaconda3\lib\site-packages\matplotlib\pyplot.py in legend(*args, **kwargs)
3821 #docstring.copy_dedent(Axes.legend)
3822 def legend(*args, **kwargs):
-> 3823 ret = gca().legend(*args, **kwargs)
3824 return ret
3825
~\Anaconda3\lib\site-packages\matplotlib\axes\_axes.py in legend(self, *args, **kwargs)
555 if len(extra_args):
556 raise TypeError('legend only accepts two non-keyword arguments')
--> 557 self.legend_ = mlegend.Legend(self, handles, labels, **kwargs)
558 self.legend_._remove_method = lambda h: setattr(self, 'legend_', None)
559 return self.legend_
~\Anaconda3\lib\site-packages\matplotlib\legend.py in __init__(self, parent, handles, labels, loc, numpoints, markerscale, markerfirst, scatterpoints, scatteryoffsets, prop, fontsize, borderpad, labelspacing, handlelength, handleheight, handletextpad, borderaxespad, columnspacing, ncol, mode, fancybox, shadow, title, framealpha, edgecolor, facecolor, bbox_to_anchor, bbox_transform, frameon, handler_map)
697
698 # init with null renderer
--> 699 self._init_legend_box(handles, labels, markerfirst)
700
701 # If shadow is activated use framealpha if not
~\Anaconda3\lib\site-packages\matplotlib\legend.py in _init_legend_box(self, handles, labels, markerfirst)
952 # original artist/handle.
953 handle_list.append(handler.legend_artist(self, orig_handle,
--> 954 fontsize, handlebox))
955 handles_and_labels.append((handlebox, textbox))
956
~\Anaconda3\lib\site-packages\matplotlib\legend_handler.py in legend_artist(self, legend, orig_handle, fontsize, handlebox)
117 artists = self.create_artists(legend, orig_handle,
118 xdescent, ydescent, width, height,
--> 119 fontsize, handlebox.get_transform())
120
121 # create_artists will return a list of artists.
~\Anaconda3\lib\site-packages\matplotlib\legend_handler.py in create_artists(self, legend, orig_handle, xdescent, ydescent, width, height, fontsize, trans)
726 p = Rectangle(xy=(-xdescent, -ydescent),
727 width=width, height=height)
--> 728 self.update_prop(p, orig_handle, legend)
729 p.set_transform(trans)
730 return [p]
~\Anaconda3\lib\site-packages\matplotlib\legend_handler.py in update_prop(self, legend_handle, orig_handle, legend)
74 def update_prop(self, legend_handle, orig_handle, legend):
75
---> 76 self._update_prop(legend_handle, orig_handle)
77
78 legend._set_artist_props(legend_handle)
~\Anaconda3\lib\site-packages\matplotlib\legend_handler.py in _update_prop(self, legend_handle, orig_handle)
710 edgecolor = getattr(orig_handle, '_original_edgecolor',
711 orig_handle.get_edgecolor())
--> 712 legend_handle.set_edgecolor(first_color(edgecolor))
713 facecolor = getattr(orig_handle, '_original_facecolor',
714 orig_handle.get_facecolor())
~\Anaconda3\lib\site-packages\matplotlib\legend_handler.py in first_color(colors)
697 if colors is None:
698 return None
--> 699 colors = mcolors.to_rgba_array(colors)
700 if len(colors):
701 return colors[0]
~\Anaconda3\lib\site-packages\matplotlib\colors.py in to_rgba_array(c, alpha)
265 result = np.empty((len(c), 4), float)
266 for i, cc in enumerate(c):
--> 267 result[i] = to_rgba(cc, alpha)
268 return result
269
~\Anaconda3\lib\site-packages\matplotlib\colors.py in to_rgba(c, alpha)
166 rgba = _colors_full_map.cache[c, alpha]
167 except (KeyError, TypeError): # Not in cache, or unhashable.
--> 168 rgba = _to_rgba_no_colorcycle(c, alpha)
169 try:
170 _colors_full_map.cache[c, alpha] = rgba
~\Anaconda3\lib\site-packages\matplotlib\colors.py in _to_rgba_no_colorcycle(c, alpha)
210 except ValueError:
211 pass
--> 212 raise ValueError("Invalid RGBA argument: {!r}".format(orig_c))
213 # tuple color.
214 c = np.array(c)
ValueError: Invalid RGBA argument: 'f'
I have no idea what 'f' is or where it's coming from. I don't see why using the basic matplotlib colors isn't working.
Something to do with the way the .PolyCollection legend handler sets 'face' as the edgecolor, not expecting it to be parsed as an RGBA array.
Place your x,y data in a DataFrame and create the hexbin with pandas:
plt.hexbin(x,y)
...
plt.scatter(x1,y1,c='w',marker='*',label='Field 1')
...
plt.legend(loc='lower right')
...won't work, but:
df.plot.hexbin('x','y',ax=plt.gca())
...
plt.scatter(x1,y1,c='w',marker='*',label='Field 1')
...
plt.legend(loc='lower right')
...will do the trick.

matplotlib how to plot multiple lines with defined colours?

I get an error when I plot with colours, and works fine without it. My line colours need to be restricted to 2 defined values.
This works in Jupyter Notebook
import random
xStart = random.sample(range(1, 10), 6)
xStart.sort()
xEnd = [x + random.randint(1, 6) for x in xStart]
yval = list(range(1, 7))
colours = ['r']*6
colours[1] = 'b'
print(xStart)
print(xEnd)
print(yval)
print(colours)
f, ax1 = plt.subplots(figsize=(6,4))
ax1.plot([xStart,xEnd], [yval,yval], '-', linewidth=1) #, color=colours)
plt.show()
This does not work.
If I uncomment the color argument, the code throws an (elaborate) error. While I can draw each line segment in a loop and colour each red or blue, I assume it will be slower than the below code. In this toy example I have 6 lines, but in reality I have 12,000 lines and it takes a few minutes, drawing one line at a time in a loop.
I think the error is related to the size of my colour argument; it is likely expecting 1 (at a time internally) whereas I am providing a list of 6.
import random
xStart = random.sample(range(1, 10), 6)
xStart.sort()
xEnd = [x + random.randint(1, 6) for x in xStart]
yval = list(range(1, 7))
colours = ['r']*6
colours[1] = 'b'
print(xStart)
print(xEnd)
print(yval)
print(colours)
f, ax1 = plt.subplots(figsize=(6,4))
ax1.plot([xStart,xEnd], [yval,yval], '-', linewidth=1, color=colours) #--> Only change from above code
plt.show()
TypeError Traceback (most recent call
last) C:\Anaconda3\lib\site-packages\matplotlib\colors.py in
to_rgba(c, alpha)
131 try:
--> 132 rgba = _colors_full_map.cache[c, alpha]
133 except (KeyError, TypeError): # Not in cache, or unhashable.
TypeError: unhashable type: 'list'
During handling of the above exception, another exception occurred:
ValueError Traceback (most recent call
last) C:\Anaconda3\lib\site-packages\IPython\core\formatters.py in
call(self, obj)
339 pass
340 else:
--> 341 return printer(obj)
342 # Finally look for special method names
343 method = get_real_method(obj, self.print_method)
C:\Anaconda3\lib\site-packages\IPython\core\pylabtools.py in
(fig)
236
237 if 'png' in formats:
--> 238 png_formatter.for_type(Figure, lambda fig: print_figure(fig, 'png', **kwargs))
239 if 'retina' in formats or 'png2x' in formats:
240 png_formatter.for_type(Figure, lambda fig: retina_figure(fig, **kwargs))
C:\Anaconda3\lib\site-packages\IPython\core\pylabtools.py in
print_figure(fig, fmt, bbox_inches, **kwargs)
120
121 bytes_io = BytesIO()
--> 122 fig.canvas.print_figure(bytes_io, **kw)
123 data = bytes_io.getvalue()
124 if fmt == 'svg':
C:\Anaconda3\lib\site-packages\matplotlib\backend_bases.py in
print_figure(self, filename, dpi, facecolor, edgecolor, orientation,
format, **kwargs) 2214 orientation=orientation,
2215 dryrun=True,
-> 2216 **kwargs) 2217 renderer = self.figure._cachedRenderer 2218 bbox_inches = self.figure.get_tightbbox(renderer)
C:\Anaconda3\lib\site-packages\matplotlib\backends\backend_agg.py in
print_png(self, filename_or_obj, *args, **kwargs)
505
506 def print_png(self, filename_or_obj, *args, **kwargs):
--> 507 FigureCanvasAgg.draw(self)
508 renderer = self.get_renderer()
509 original_dpi = renderer.dpi
C:\Anaconda3\lib\site-packages\matplotlib\backends\backend_agg.py in
draw(self)
428 # if toolbar:
429 # toolbar.set_cursor(cursors.WAIT)
--> 430 self.figure.draw(self.renderer)
431 finally:
432 # if toolbar:
C:\Anaconda3\lib\site-packages\matplotlib\artist.py in
draw_wrapper(artist, renderer, *args, **kwargs)
53 renderer.start_filter()
54
---> 55 return draw(artist, renderer, *args, **kwargs)
56 finally:
57 if artist.get_agg_filter() is not None:
C:\Anaconda3\lib\site-packages\matplotlib\figure.py in draw(self,
renderer) 1297 1298
mimage._draw_list_compositing_images(
-> 1299 renderer, self, artists, self.suppressComposite) 1300 1301
renderer.close_group('figure')
C:\Anaconda3\lib\site-packages\matplotlib\image.py in
_draw_list_compositing_images(renderer, parent, artists, suppress_composite)
136 if not_composite or not has_images:
137 for a in artists:
--> 138 a.draw(renderer)
139 else:
140 # Composite any adjacent images together
C:\Anaconda3\lib\site-packages\matplotlib\artist.py in
draw_wrapper(artist, renderer, *args, **kwargs)
53 renderer.start_filter()
54
---> 55 return draw(artist, renderer, *args, **kwargs)
56 finally:
57 if artist.get_agg_filter() is not None:
C:\Anaconda3\lib\site-packages\matplotlib\axes_base.py in draw(self,
renderer, inframe) 2435 renderer.stop_rasterizing()
2436
-> 2437 mimage._draw_list_compositing_images(renderer, self, artists) 2438 2439 renderer.close_group('axes')
C:\Anaconda3\lib\site-packages\matplotlib\image.py in
_draw_list_compositing_images(renderer, parent, artists, suppress_composite)
136 if not_composite or not has_images:
137 for a in artists:
--> 138 a.draw(renderer)
139 else:
140 # Composite any adjacent images together
C:\Anaconda3\lib\site-packages\matplotlib\artist.py in
draw_wrapper(artist, renderer, *args, **kwargs)
53 renderer.start_filter()
54
---> 55 return draw(artist, renderer, *args, **kwargs)
56 finally:
57 if artist.get_agg_filter() is not None:
C:\Anaconda3\lib\site-packages\matplotlib\lines.py in draw(self,
renderer)
765 self._set_gc_clip(gc)
766
--> 767 ln_color_rgba = self._get_rgba_ln_color()
768 gc.set_foreground(ln_color_rgba, isRGBA=True)
769 gc.set_alpha(ln_color_rgba[3])
C:\Anaconda3\lib\site-packages\matplotlib\lines.py in
_get_rgba_ln_color(self, alt) 1267 1268 def _get_rgba_ln_color(self, alt=False):
-> 1269 return mcolors.to_rgba(self._color, self._alpha) 1270 1271 # some aliases....
C:\Anaconda3\lib\site-packages\matplotlib\colors.py in to_rgba(c,
alpha)
132 rgba = _colors_full_map.cache[c, alpha]
133 except (KeyError, TypeError): # Not in cache, or unhashable.
--> 134 rgba = _to_rgba_no_colorcycle(c, alpha)
135 try:
136 _colors_full_map.cache[c, alpha] = rgba
C:\Anaconda3\lib\site-packages\matplotlib\colors.py in
_to_rgba_no_colorcycle(c, alpha)
183 # float)andnp.array(...).astype(float)` all convert "0.5" to 0.5.
184 # Test dimensionality to reject single floats.
--> 185 raise ValueError("Invalid RGBA argument: {!r}".format(orig_c))
186 # Return a tuple to prevent the cached value from being modified.
187 c = tuple(c.astype(float))
ValueError: Invalid RGBA argument: ['r', 'b', 'r', 'r', 'r', 'r']
OK thanks to Bazingaa and this thread, How to get different colored lines for different plots in a single figure?
...the final code is as follows.
Since I am drawing multiple lines with one ax.plot() command, the colour argument will not take. IMHO it should since it logically makes sense and should be an enhancement matplotlib does. Nonetheless, here is the solution as Bazingaa pointed out to me.
For those interested, as expected this code does run a LOT faster compared with drawing 12K lines in a loop (in order to draw & color them one at a time with individual ax.plot() commands).
import random
xStart = random.sample(range(1, 10), 6)
xStart.sort()
xEnd = [x + random.randint(1, 6) for x in xStart]
yval = list(range(1, 7))
colours = ['r']*6
colours[1] = 'b'
f, ax1 = plt.subplots(figsize=(6,4))
ax1.plot([xStart,xEnd], [yval,yval], '-', linewidth=1) #, color=colours) #Leaving the color argument commented
#Add new code to colour after the fact
for idx,line in enumerate(ax1.lines):
line.set_color(colours[idx])
plt.show()

Jupyter Seaborn sns.lmplot Error

I am having trouble while trying to plot a lmplot in Jupyter.
Below you can see the first rows of my data and rest of the data goes same as well.
Year_of_Release Platform_General Platform counts Global_Sales(M#) GS_Amount/Game
1994.0 Sony_Playstation PS 1 1.27 1.270000
1996.0 Sony_Playstation PS 5 17.48 3.496000
1997.0 Sony_Playstation PS 12 30.89 2.574167
1998.0 Sony_Playstation PS 22 43.08 1.958182
1999.0 Sony_Playstation PS 25 49.02 1.960800
Below is the code that should draw the plot but
# Make a custom palette with platform colors
pal = dict(Sony_Playstation="#6495ED",Microsoft_Xbox="#F08080",Nintendo="Green")
# Show the survival proability as a function of platforms
g = sns.lmplot(x="Year_of_Release", y="GS_Amount/Game", col="Platform_General", hue="Platform_General",
data=vgs_df_pf_grouped,palette=pal, y_jitter=.02, logistic=True)
# Use more informative axis labels than are provided by default
g.set_axis_labels("Year of Release", "Sales per Game Released (M #)")
When I run this code below long error massage appears and because of I am new to data analytics I can't understand what is wrong with data or code.
I appreciate some help with this one. Thank you.
ValueError Traceback (most recent call last)
<ipython-input-68-c78b6ba34d96> in <module>()
5 # Show the survival proability as a function of age and sex
6 g = sns.lmplot(x="Year_of_Release", y="GS_Amount/Game",col="Platform_General", hue="Platform_General",
----> 7 data=vgs_df_pf_grouped,palette=pal, y_jitter=.02, logistic=True)
8
9 # Use more informative axis labels than are provided by default
~/anaconda3/lib/python3.6/site-packages/seaborn/regression.py in lmplot(x, y, data, hue, col, row, palette, col_wrap, size, aspect, markers, sharex, sharey, hue_order, col_order, row_order, legend, legend_out, x_estimator, x_bins, x_ci, scatter, fit_reg, ci, n_boot, units, order, logistic, lowess, robust, logx, x_partial, y_partial, truncate, x_jitter, y_jitter, scatter_kws, line_kws)
588 scatter_kws=scatter_kws, line_kws=line_kws,
589 )
--> 590 facets.map_dataframe(regplot, x, y, **regplot_kws)
591
592 # Add a legend
~/anaconda3/lib/python3.6/site-packages/seaborn/axisgrid.py in map_dataframe(self, func, *args, **kwargs)
795
796 # Draw the plot
--> 797 self._facet_plot(func, ax, args, kwargs)
798
799 # Finalize the annotations and layout
~/anaconda3/lib/python3.6/site-packages/seaborn/axisgrid.py in _facet_plot(self, func, ax, plot_args, plot_kwargs)
813
814 # Draw the plot
--> 815 func(*plot_args, **plot_kwargs)
816
817 # Sort out the supporting information
~/anaconda3/lib/python3.6/site-packages/seaborn/regression.py in regplot(x, y, data, x_estimator, x_bins, x_ci, scatter, fit_reg, ci, n_boot, units, order, logistic, lowess, robust, logx, x_partial, y_partial, truncate, dropna, x_jitter, y_jitter, label, color, marker, scatter_kws, line_kws, ax)
788 scatter_kws["marker"] = marker
789 line_kws = {} if line_kws is None else copy.copy(line_kws)
--> 790 plotter.plot(ax, scatter_kws, line_kws)
791 return ax
792
~/anaconda3/lib/python3.6/site-packages/seaborn/regression.py in plot(self, ax, scatter_kws, line_kws)
340 self.scatterplot(ax, scatter_kws)
341 if self.fit_reg:
--> 342 self.lineplot(ax, line_kws)
343
344 # Label the axes
~/anaconda3/lib/python3.6/site-packages/seaborn/regression.py in lineplot(self, ax, kws)
385
386 # Fit the regression model
--> 387 grid, yhat, err_bands = self.fit_regression(ax)
388
389 # Get set default aesthetics
~/anaconda3/lib/python3.6/site-packages/seaborn/regression.py in fit_regression(self, ax, x_range, grid)
198 from statsmodels.genmod.families import Binomial
199 yhat, yhat_boots = self.fit_statsmodels(grid, GLM,
--> 200 family=Binomial())
201 elif self.lowess:
202 ci = None
~/anaconda3/lib/python3.6/site-packages/seaborn/regression.py in fit_statsmodels(self, grid, model, **kwargs)
258 return yhat
259
--> 260 yhat = reg_func(X, y)
261 if self.ci is None:
262 return yhat, None
~/anaconda3/lib/python3.6/site-packages/seaborn/regression.py in reg_func(_x, _y)
252 def reg_func(_x, _y):
253 try:
--> 254 yhat = model(_y, _x, **kwargs).fit().predict(grid)
255 except glm.PerfectSeparationError:
256 yhat = np.empty(len(grid))
~/anaconda3/lib/python3.6/site-packages/statsmodels/genmod/generalized_linear_model.py in fit(self, start_params, maxiter, method, tol, scale, cov_type, cov_kwds, use_t, full_output, disp, max_start_irls, **kwargs)
901 return self._fit_irls(start_params=start_params, maxiter=maxiter,
902 tol=tol, scale=scale, cov_type=cov_type,
--> 903 cov_kwds=cov_kwds, use_t=use_t, **kwargs)
904 else:
905 return self._fit_gradient(start_params=start_params,
~/anaconda3/lib/python3.6/site-packages/statsmodels/genmod/generalized_linear_model.py in _fit_irls(self, start_params, maxiter, tol, scale, cov_type, cov_kwds, use_t, **kwargs)
977 dev = self.family.deviance(self.endog, mu, self.freq_weights)
978 if np.isnan(dev):
--> 979 raise ValueError("The first guess on the deviance function "
980 "returned a nan. This could be a boundary "
981 " problem and should be reported.")
ValueError: The first guess on the deviance function returned a nan. This could be a boundary problem and should be reported.

Memory error while plotting dataframe (matplotlib)

I'm using Pandas with Jupyter Notebook and trying to plot a small dataframe:
and when i'm inserting the following line:
df9.plot(x='Time', y='Pressure mean')
I'm getting the following error:
---------------------------------------------------------------------------
MemoryError Traceback (most recent call last)
<ipython-input-8-c789b8162a1a> in <module>()
----> 1 df9.plot(x='Time', y='Pressure mean')
C:\Anaconda3\lib\site-packages\pandas\tools\plotting.py in __call__(self, x, y, kind, ax, subplots, sharex, sharey, layout, figsize, use_index, title, grid, legend, style, logx, logy, loglog, xticks, yticks, xlim, ylim, rot, fontsize, colormap, table, yerr, xerr, secondary_y, sort_columns, **kwds)
3735 fontsize=fontsize, colormap=colormap, table=table,
3736 yerr=yerr, xerr=xerr, secondary_y=secondary_y,
-> 3737 sort_columns=sort_columns, **kwds)
3738 __call__.__doc__ = plot_frame.__doc__
3739
C:\Anaconda3\lib\site-packages\pandas\tools\plotting.py in plot_frame(data, x, y, kind, ax, subplots, sharex, sharey, layout, figsize, use_index, title, grid, legend, style, logx, logy, loglog, xticks, yticks, xlim, ylim, rot, fontsize, colormap, table, yerr, xerr, secondary_y, sort_columns, **kwds)
2609 yerr=yerr, xerr=xerr,
2610 secondary_y=secondary_y, sort_columns=sort_columns,
-> 2611 **kwds)
2612
2613
C:\Anaconda3\lib\site-packages\pandas\tools\plotting.py in _plot(data, x, y, subplots, ax, kind, **kwds)
2436 plot_obj = klass(data, subplots=subplots, ax=ax, kind=kind, **kwds)
2437
-> 2438 plot_obj.generate()
2439 plot_obj.draw()
2440 return plot_obj.result
C:\Anaconda3\lib\site-packages\pandas\tools\plotting.py in generate(self)
1029
1030 for ax in self.axes:
-> 1031 self._post_plot_logic_common(ax, self.data)
1032 self._post_plot_logic(ax, self.data)
1033
C:\Anaconda3\lib\site-packages\pandas\tools\plotting.py in _post_plot_logic_common(self, ax, data)
1157 ax.set_xticklabels(xticklabels)
1158 self._apply_axis_properties(ax.xaxis, rot=self.rot,
-> 1159 fontsize=self.fontsize)
1160 self._apply_axis_properties(ax.yaxis, fontsize=self.fontsize)
1161 elif self.orientation == 'horizontal':
C:\Anaconda3\lib\site-packages\pandas\tools\plotting.py in _apply_axis_properties(self, axis, rot, fontsize)
1205
1206 def _apply_axis_properties(self, axis, rot=None, fontsize=None):
-> 1207 labels = axis.get_majorticklabels() + axis.get_minorticklabels()
1208 for label in labels:
1209 if rot is not None:
C:\Anaconda3\lib\site-packages\matplotlib\axis.py in get_majorticklabels(self)
1159 def get_majorticklabels(self):
1160 'Return a list of Text instances for the major ticklabels'
-> 1161 ticks = self.get_major_ticks()
1162 labels1 = [tick.label1 for tick in ticks if tick.label1On]
1163 labels2 = [tick.label2 for tick in ticks if tick.label2On]
C:\Anaconda3\lib\site-packages\matplotlib\axis.py in get_major_ticks(self, numticks)
1288 'get the tick instances; grow as necessary'
1289 if numticks is None:
-> 1290 numticks = len(self.get_major_locator()())
1291 if len(self.majorTicks) < numticks:
1292 # update the new tick label properties from the old
C:\Anaconda3\lib\site-packages\pandas\tseries\converter.py in __call__(self)
876 vmin, vmax = vmax, vmin
877 if self.isdynamic:
--> 878 locs = self._get_default_locs(vmin, vmax)
879 else: # pragma: no cover
880 base = self.base
C:\Anaconda3\lib\site-packages\pandas\tseries\converter.py in _get_default_locs(self, vmin, vmax)
857
858 if self.plot_obj.date_axis_info is None:
--> 859 self.plot_obj.date_axis_info = self.finder(vmin, vmax, self.freq)
860
861 locator = self.plot_obj.date_axis_info
C:\Anaconda3\lib\site-packages\pandas\tseries\converter.py in _daily_finder(vmin, vmax, freq)
481 Period(ordinal=int(vmax), freq=freq))
482 span = vmax.ordinal - vmin.ordinal + 1
--> 483 dates_ = PeriodIndex(start=vmin, end=vmax, freq=freq)
484 # Initialize the output
485 info = np.zeros(span,
C:\Anaconda3\lib\site-packages\pandas\tseries\period.py in __new__(cls, data, ordinal, freq, start, end, periods, copy, name, tz, **kwargs)
186 else:
187 data, freq = cls._generate_range(start, end, periods,
--> 188 freq, kwargs)
189 else:
190 ordinal, freq = cls._from_arraylike(data, freq, tz)
C:\Anaconda3\lib\site-packages\pandas\tseries\period.py in _generate_range(cls, start, end, periods, freq, fields)
200 raise ValueError('Can either instantiate from fields '
201 'or endpoints, but not both')
--> 202 subarr, freq = _get_ordinal_range(start, end, periods, freq)
203 elif field_count > 0:
204 subarr, freq = _range_from_fields(freq=freq, **fields)
C:\Anaconda3\lib\site-packages\pandas\tseries\period.py in _get_ordinal_range(start, end, periods, freq, mult)
1026 dtype=np.int64)
1027 else:
-> 1028 data = np.arange(start.ordinal, end.ordinal + 1, mult, dtype=np.int64)
1029
1030 return data, freq
MemoryError:
What is the problem ? I can't figure it out.
Thanks !
The issue originates from using TimedeltaIndex (or timedelta) for your time column. It was reported there: https://github.com/pydata/pandas/issues/8711
No solution has been brought to us yet.
As an alternative solution, I invite you to convert your data to DateTime or DateTimeIndex. Let's say YourDate contains the starting date of your observations.
df9.index = pd.DatetimeIndex(pd.datetime.strptime(YourDate,'%d.%m.%Y %H:%M:%S')
+df9['Time'])
df9.plot(y='Pressure mean')
Note that it will plot only the hours if you have less than 24 hours.
EDIT (2016-11-07):
I can now use timedelta as index and plot correctly. This is how I proceed (assuming I have float numbers indicating hours):
converter = {'Time[h]' : lambda x: pd.to_timedelta(float(x),unit='h')}#converts float to timedelta
df = pd.read_csv(fpath, sep='\t',
skiprows=len(comments),#header
names=dt.keys(),#you need of course your own dtype
dtype=dt,#you need of course your own dtype
encoding='latin-1',#European data...
skipinitialspace=True,
converters=converter)
df = df.set_index('Time[h]')#time column to index.
As Wli mentioned, it is a bug still to be fixed. But as a workaround this worked for me. -
plt.plot(s.index,s.values)

Categories