Scatter-plot matrix with lowess smoother - python

What would the Python code be for a scatter-plot matrix with lowess smoothers similar to the following one?
I'm not sure about the original source of the graph. I saw it on this post on CrossValidated. The ellipses define the covariance according to the original post. I'm not sure what the numbers mean.

I adapted the pandas scatter_matrix function and got a decent result:
import pandas as pd
import numpy as np
frame = pd.DataFrame(np.random.randn(100, 4), columns=['A','B','C','D'])
fig = scatter_matrix_lowess(frame, alpha=0.4, figsize=(12,12));
fig.suptitle('Scatterplot matrix with lowess smoother', fontsize=16);
This is the code for scatter_matrix_lowess:
def scatter_matrix_lowess(frame, alpha=0.5, figsize=None, grid=False,
diagonal='hist', marker='.', density_kwds=None,
hist_kwds=None, range_padding=0.05, **kwds):
"""
Draw a matrix of scatter plots with lowess smoother.
This is an adapted version of the pandas scatter_matrix function.
Parameters
----------
frame : DataFrame
alpha : float, optional
amount of transparency applied
figsize : (float,float), optional
a tuple (width, height) in inches
ax : Matplotlib axis object, optional
grid : bool, optional
setting this to True will show the grid
diagonal : {'hist', 'kde'}
pick between 'kde' and 'hist' for
either Kernel Density Estimation or Histogram
plot in the diagonal
marker : str, optional
Matplotlib marker type, default '.'
hist_kwds : other plotting keyword arguments
To be passed to hist function
density_kwds : other plotting keyword arguments
To be passed to kernel density estimate plot
range_padding : float, optional
relative extension of axis range in x and y
with respect to (x_max - x_min) or (y_max - y_min),
default 0.05
kwds : other plotting keyword arguments
To be passed to scatter function
Examples
--------
>>> df = DataFrame(np.random.randn(1000, 4), columns=['A','B','C','D'])
>>> scatter_matrix_lowess(df, alpha=0.2)
"""
import matplotlib.pyplot as plt
from matplotlib.artist import setp
import pandas.core.common as com
from pandas.compat import range, lrange, lmap, map, zip
from statsmodels.nonparametric.smoothers_lowess import lowess
df = frame._get_numeric_data()
n = df.columns.size
fig, axes = plt.subplots(nrows=n, ncols=n, figsize=figsize, squeeze=False)
# no gaps between subplots
fig.subplots_adjust(wspace=0, hspace=0)
mask = com.notnull(df)
marker = _get_marker_compat(marker)
hist_kwds = hist_kwds or {}
density_kwds = density_kwds or {}
# workaround because `c='b'` is hardcoded in matplotlibs scatter method
kwds.setdefault('c', plt.rcParams['patch.facecolor'])
boundaries_list = []
for a in df.columns:
values = df[a].values[mask[a].values]
rmin_, rmax_ = np.min(values), np.max(values)
rdelta_ext = (rmax_ - rmin_) * range_padding / 2.
boundaries_list.append((rmin_ - rdelta_ext, rmax_+ rdelta_ext))
for i, a in zip(lrange(n), df.columns):
for j, b in zip(lrange(n), df.columns):
ax = axes[i, j]
if i == j:
values = df[a].values[mask[a].values]
# Deal with the diagonal by drawing a histogram there.
if diagonal == 'hist':
ax.hist(values, **hist_kwds)
elif diagonal in ('kde', 'density'):
from scipy.stats import gaussian_kde
y = values
gkde = gaussian_kde(y)
ind = np.linspace(y.min(), y.max(), 1000)
ax.plot(ind, gkde.evaluate(ind), **density_kwds)
ax.set_xlim(boundaries_list[i])
else:
common = (mask[a] & mask[b]).values
ax.scatter(df[b][common], df[a][common],
marker=marker, alpha=alpha, **kwds)
# The following 2 lines are new and add the lowess smoothing
ys = lowess(df[a][common], df[b][common])
ax.plot(ys[:,0], ys[:,1], 'red', linewidth=1)
ax.set_xlim(boundaries_list[j])
ax.set_ylim(boundaries_list[i])
ax.set_xlabel('')
ax.set_ylabel('')
_label_axis(ax, kind='x', label=b, position='bottom', rotate=True)
_label_axis(ax, kind='y', label=a, position='left')
if j!= 0:
ax.yaxis.set_visible(False)
if i != n-1:
ax.xaxis.set_visible(False)
for ax in axes.flat:
setp(ax.get_xticklabels(), fontsize=8)
setp(ax.get_yticklabels(), fontsize=8)
return fig
def _label_axis(ax, kind='x', label='', position='top',
ticks=True, rotate=False):
from matplotlib.artist import setp
if kind == 'x':
ax.set_xlabel(label, visible=True)
ax.xaxis.set_visible(True)
ax.xaxis.set_ticks_position(position)
ax.xaxis.set_label_position(position)
if rotate:
setp(ax.get_xticklabels(), rotation=90)
elif kind == 'y':
ax.yaxis.set_visible(True)
ax.set_ylabel(label, visible=True)
# ax.set_ylabel(a)
ax.yaxis.set_ticks_position(position)
ax.yaxis.set_label_position(position)
return
def _get_marker_compat(marker):
import matplotlib.lines as mlines
import matplotlib as mpl
if mpl.__version__ < '1.1.0' and marker == '.':
return 'o'
if marker not in mlines.lineMarkers:
return 'o'
return marker

Related

Draw error shading bands on line plot - python

Let's say I have 25 lines like this:
x = np.linspace(0, 30, 60)
y = np.sin(x/6*np.pi)
error = np.random.normal(0.1, 0.02, size=y.shape)
y1 = y+ np.random.normal(0, 0.1, size=y.shape)
y2= y+ np.random.normal(0, 0.1, size=y.shape)
plt.plot(x, y, 'k-')
plt.plot(x, y1, 'k-')
plt.plot(x, y2,'k-')
.
.
.
Now, I'd like to make a plot like this: . How do I automatically make these error bars and make the shading given just a bunch of lines, all carrying the same overall shape but with slight variations.
It is not very clear to me how the error variable in your code sample relates to the variations of the y variables. So here I give an example of how to compute and draw an error band based on the random variations of 25 y variables, and I use these same variations to create y error bars on top of the band. The same logic would apply to variations/errors on the x-axis.
Let's first create some random data and see what a line plot of 25 similar lines looks like:
import numpy as np # v 1.19.2
import matplotlib.pyplot as plt # v 3.3.2
rng = np.random.default_rng(seed=1)
x = np.linspace(0, 5*np.pi, 50)
y = np.sin(x)
# error = np.random.normal(0.1, 0.02, size=x.shape) # I leave this out
nb_yfuncs = 25
ynoise = rng.normal(1, 0.1, size=(nb_yfuncs, y.size))
yfuncs = nb_yfuncs*[y] + ynoise
fig, ax = plt.subplots(figsize=(10,4))
for yfunc in yfuncs:
plt.plot(x, yfunc, 'k-')
plt.show()
I use the mean of yfuncs as the baseline variable. I extract the minimum and maximum of yfuncs for each x to compute the error band. I compute error bars that cover the same extent as the error band. Therefore, the errors are asymmetrical relative to the mean which is why they are entered as a 2-D array in the plotting function. The error band is drawn with fill_between and the error bars with errorbar. Here is what the code looks like:
ymean = yfuncs.mean(axis=0)
ymin = yfuncs.min(axis=0)
ymax = yfuncs.max(axis=0)
yerror = np.stack((ymean-ymin, ymax-ymean))
fig, ax = plt.subplots(figsize=(10,4))
plt.fill_between(x, ymin, ymax, alpha=0.2, label='error band')
plt.errorbar(x, ymean, yerror, color='tab:blue', ecolor='tab:blue',
capsize=3, linewidth=1, label='mean with error bars')
plt.legend()
plt.show()
You can do it only with matplot lib as follows:
def plot_with_error_bands(x: np.ndarray, y: np.ndarray, yerr: np.ndarray,
xlabel: str, ylabel: str,
title: str,
curve_label: Optional[str] = None,
error_band_label: Optional[str] = None,
color: Optional[str] = None, ecolor: Optional[str] = None,
linewidth: float = 1.0,
style: Optional[str] = 'default',
capsize: float = 3.0,
alpha: float = 0.2,
show: bool = False
):
"""
note:
- example values for color and ecolor:
color='tab:blue', ecolor='tab:blue'
- capsize is the length of the horizontal line for the error bar. Larger number makes it longer horizontally.
- alpha value create than 0.2 make the error bands color for filling it too dark. Really consider not changing.
- sample values for curves and error_band labels:
curve_label: str = 'mean with error bars',
error_band_label: str = 'error band',
refs:
- for making the seaborn and matplot lib look the same see: https://stackoverflow.com/questions/54522709/my-seaborn-and-matplotlib-plots-look-the-same
"""
if style == 'default':
# use the standard matplotlib
plt.style.use("default")
elif style == 'seaborn' or style == 'sns':
# looks idential to seaborn
import seaborn as sns
sns.set()
elif style == 'seaborn-darkgrid':
# uses the default colours of matplot but with blue background of seaborn
plt.style.use("seaborn-darkgrid")
elif style == 'ggplot':
# other alternative to something that looks like seaborn
plt.style.use('ggplot')
# ax = plt.gca()
# fig = plt.gcf(
# fig, axs = plt.subplots(nrows=1, ncols=1, sharex=True, tight_layout=True)
plt.errorbar(x=x, y=y, yerr=yerr, color=color, ecolor=ecolor,
capsize=capsize, linewidth=linewidth, label=curve_label)
plt.fill_between(x=x, y1=y - yerr, y2=y + yerr, alpha=alpha, label=error_band_label)
plt.grid(True)
if curve_label or error_band_label:
plt.legend()
plt.title(title)
plt.xlabel(xlabel)
plt.ylabel(ylabel)
if show:
plt.show()
e.g.
def plot_with_error_bands_test():
import numpy as np # v 1.19.2
import matplotlib.pyplot as plt # v 3.3.2
# the number of x values to consider in a given range e.g. [0,1] will sample 10 raw features x sampled at in [0,1] interval
num_x: int = 30
# the repetitions for each x feature value e.g. multiple measurements for sample x=0.0 up to x=1.0 at the end
rep_per_x: int = 5
total_size_data_set: int = num_x * rep_per_x
print(f'{total_size_data_set=}')
# - create fake data set
# only consider 10 features from 0 to 1
x = np.linspace(start=0.0, stop=2*np.pi, num=num_x)
# to introduce fake variation add uniform noise to each feature and pretend each one is a new observation for that feature
noise_uniform: np.ndarray = np.random.rand(rep_per_x, num_x)
# same as above but have the noise be the same for each x (thats what the 1 means)
noise_normal: np.ndarray = np.random.randn(rep_per_x, 1)
# signal function
sin_signal: np.ndarray = np.sin(x)
cos_signal: np.ndarray = np.cos(x)
# [rep_per_x, num_x]
y1: np.ndarray = sin_signal + noise_uniform + noise_normal
y2: np.ndarray = cos_signal + noise_uniform + noise_normal
y1mean = y1.mean(axis=0)
y1err = y1.std(axis=0)
y2mean = y2.mean(axis=0)
y2err = y2.std(axis=0)
plot_with_error_bands(x=x, y=y1mean, yerr=y1err, xlabel='x', ylabel='y', title='Custom Seaborn')
plot_with_error_bands(x=x, y=y2mean, yerr=y2err, xlabel='x', ylabel='y', title='Custom Seaborn')
plt.show()
looks as follows:
if you want to use seaborn check this question out: How to show error bands for pure matrices [Samples, X_Range] with Seaborn error bands?

Setting a clip on a seaborn plot

I am having trouble clipping a seaborn plot (a kdeplot, specifically) as I thought would be fairly simple per this example in the matplotlib docs.
For example, the following code:
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
fig = plt.figure()
ax = fig.add_subplot(111, frameon=False, xticks=[], yticks=[])
random_points = np.array([p for p in np.random.random(size=(100, 2)) if 0 < p[0] < 1 and 0 < p[1] < 1])
kde = sns.kdeplot(random_points[:,0], random_points[:,1], ax=ax)
xmin, xmax = kde.get_xlim()
ymin, ymax = kde.get_ylim()
patch = mpl.patches.Circle(((xmin + xmax)/2, (ymin + ymax) / 2), radius=0.4)
ax.add_patch(patch)
kde.set_clip_path(patch)
Results in the following output:
I would like to clip this result so that the KDE contour lines do not appear outside of the circle. I haven't found a way to do it thus far...is this possible?
Serenity's answer works for simple shapes, but breaks down for reasons unknown when the shape contains more than three or so vertices (I had difficulty establishing the exact parameters, even). For sufficiently large shapes the fill flows into where the edge should be, as for example here.
It did get me thinking along the right path, however. While it doesn't seem to be possible to do so simply using matplotlib natives (perhaps there's an error in the code he provided anyway?), it's easy as pie when using the shapely library, which is meant for tasks like this one.
Generating the Shape
In this case you will need shapely's symmetric_difference method. A symmetric difference is the set theoretic name for this cut-out operation.
For this example I've loaded a Manhattan-shaped polygon as a shapely.geometry.Polygon object. I won't covert the initialization process here, it's easy to do, and everything you expect it to be.
We can draw a box around our manhattan using manhattan.envelope, and then apply the difference. This is the following:
unmanhattan = manhattan.envelope.symmetric_difference(manhattan)
Doing which gets us to:
Adding it to the Plot
Ok, but this is a shapely object not a matplotlib Patch, how do we add it to the plot? The descartes library handles this conversion.
unmanhattan_patch = descartes.PolygonPatch(unmanhattan)
This is all we need! Now we do:
unmanhattan_patch = descartes.PolygonPatch(unmanhattan)
ax.add_patch(unmanhattan_patch)
sns.kdeplot(x=points['x_coord'], y=points['y_coord'], ax=ax)
And get:
And with a little bit more work extending this to the rest of the polygons in the view (New York City), we can get the following final result:
I guess your example work only for 'imshow'.
To hide contours lines over the circle you have to plot 'inverse' polygon of desired color.
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
import numpy as np
import seaborn as sns
# Color plot except polygon
def mask_outside(poly_verts, facecolor = None, ax = None):
from matplotlib.patches import PathPatch
from matplotlib.path import Path
if ax is None: ax = plt.gca()
if facecolor is None: facecolor = plt.gcf().get_facecolor()
# Construct inverse polygon
xlim, ylim = ax.get_xlim(), ax.get_ylim()
bound_verts = [(xlim[0], ylim[0]), (xlim[0], ylim[1]),
(xlim[1], ylim[1]), (xlim[1], ylim[0]), (xlim[0], ylim[0])]
bound_codes = [Path.MOVETO] + (len(bound_verts) - 1) * [Path.LINETO]
poly_codes = [Path.MOVETO] + (len(poly_verts) - 1) * [Path.LINETO]
# Plot it
path = Path(bound_verts + poly_verts, bound_codes + poly_codes)
ax.add_patch(PathPatch(path, facecolor = facecolor, edgecolor = 'None', zorder = 1e+3))
# Your example
fig = plt.figure()
ax = fig.add_subplot(111, frameon=False, xticks=[], yticks=[])
random_points = np.array([p for p in np.random.random(size=(100, 2)) if 0 < p[0] < 1 and 0 < p[1] < 1])
kde = sns.kdeplot(random_points[:,0], random_points[:,1], ax=ax)
xmin, xmax = kde.get_xlim()
ymin, ymax = kde.get_ylim()
patch = mpl.patches.Circle(((xmin + xmax) / 2, (ymin + ymax) / 2), radius=0.4)
mask_outside([tuple(x) for x in patch.get_verts()]) # call before add_patch!
ax.add_patch(patch)
plt.show()

matplotlib scatterplot: adding 4th dimension by the marker shape

I would like to add a fourth dimension to the scatter plot by defining the ellipticity of the markers depending on a variable. Is that possible somehow ?
EDIT:
I would like to avoid a 3D-plot. In my opinion these plots are usually not very informative.
You can place Ellipse patches directly onto your axes, as demonstrated in this matplotlib example. To adapt it to use eccentricity as your "third dimension") keeping the marker area constant:
from pylab import figure, show, rand
from matplotlib.patches import Ellipse
import numpy as np
import matplotlib.pyplot as plt
N = 25
# ellipse centers
xy = np.random.rand(N, 2)*10
# ellipse eccentrities
eccs = np.random.rand(N) * 0.8 + 0.1
fig = plt.figure()
ax = fig.add_subplot(111, aspect='equal')
A = 0.1
for pos, e in zip(xy, eccs):
# semi-minor, semi-major axes, b and a:
b = np.sqrt(A/np.pi * np.sqrt(1-e**2))
a = A / np.pi / b
ellipse = Ellipse(xy=pos, width=2*a, height=2*b)
ax.add_artist(ellipse)
ax.set_xlim(0, 10)
ax.set_ylim(0, 10)
show()
Of course, you need to scale your marker area to your x-, y- values in this case.
You can use colorbar as the 4th dimension to your 3D plot. One example is as shown below:
import matplotlib.cm as cmx
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt
import matplotlib
import numpy as np
def scatter3d(x,y,z, cs, colorsMap='jet'):
cm = plt.get_cmap(colorsMap)
cNorm = matplotlib.colors.Normalize(vmin=min(cs), vmax=max(cs))
scalarMap = cmx.ScalarMappable(norm=cNorm, cmap=cm)
fig = plt.figure()
ax = Axes3D(fig)
ax.scatter(x, y, z, c=scalarMap.to_rgba(cs))
scalarMap.set_array(cs)
fig.colorbar(scalarMap,label='Test')
plt.show()
x = np.random.uniform(0,1,50)
y = np.random.uniform(0,1,50)
z = np.random.uniform(0,1,50)
so scatter3D(x,y,z,x+y) produces:
with x+y being the 4th dimension shown in color. You can add your calculated ellipticity depending on your specific variable instead of x+y to get what you want.
To change the ellipticity of the markers you will have to create them manually as such a feature is not implemented yet. However, I believe you can show 4 dimensions with a 2D scatter plot by using color and size as additional dimensions. You will have to take care of the scaling from data to marker size yourself. I added a simple function to handle that in the example below:
import matplotlib.pyplot as plt
import numpy as np
data = np.random.rand(60,4)
def scale_size(data, data_min=None, data_max=None, size_min=10, size_max=60):
# if the data limits are set to None we will just infer them from the data
if data_min is None:
data_min = data.min()
if data_max is None:
data_max = data.max()
size_range = size_max - size_min
data_range = data_max - data_min
return ((data - data_min) * size_range / data_range) + size_min
plt.scatter(data[:,0], data[:,1], c=data[:,2], s=scale_size(data[:,3]))
plt.colorbar()
plt.show()
Result:

Is it possible to change line color in a plot if exceeds a specific range?

Is it possible to change the line color in a plot when values exceeds a certain y value?
Example:
import numpy as np
import matplotlib.pyplot as plt
a = np.array([1,2,17,20,16,3,5,4])
plt.plt(a)
This one gives the following:
I want to visualise the values that exceeds y=15. Something like the following figure:
Or something like this(with cycle linestyle)::
Is it possible?
Define a helper function (this a bare-bones one, more bells and whistles can be added). This code is a slight refactoring of this example from the documentation.
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.collections import LineCollection
from matplotlib.colors import ListedColormap, BoundaryNorm
def threshold_plot(ax, x, y, threshv, color, overcolor):
"""
Helper function to plot points above a threshold in a different color
Parameters
----------
ax : Axes
Axes to plot to
x, y : array
The x and y values
threshv : float
Plot using overcolor above this value
color : color
The color to use for the lower values
overcolor: color
The color to use for values over threshv
"""
# Create a colormap for red, green and blue and a norm to color
# f' < -0.5 red, f' > 0.5 blue, and the rest green
cmap = ListedColormap([color, overcolor])
norm = BoundaryNorm([np.min(y), threshv, np.max(y)], cmap.N)
# Create a set of line segments so that we can color them individually
# This creates the points as a N x 1 x 2 array so that we can stack points
# together easily to get the segments. The segments array for line collection
# needs to be numlines x points per line x 2 (x and y)
points = np.array([x, y]).T.reshape(-1, 1, 2)
segments = np.concatenate([points[:-1], points[1:]], axis=1)
# Create the line collection object, setting the colormapping parameters.
# Have to set the actual values used for colormapping separately.
lc = LineCollection(segments, cmap=cmap, norm=norm)
lc.set_array(y)
ax.add_collection(lc)
ax.set_xlim(np.min(x), np.max(x))
ax.set_ylim(np.min(y)*1.1, np.max(y)*1.1)
return lc
Example of usage
fig, ax = plt.subplots()
x = np.linspace(0, 3 * np.pi, 500)
y = np.sin(x)
lc = threshold_plot(ax, x, y, .75, 'k', 'r')
ax.axhline(.75, color='k', ls='--')
lc.set_linewidth(3)
and the output
If you want just the markers to change color, use the same norm and cmap and pass them to scatter as
cmap = ListedColormap([color, overcolor])
norm = BoundaryNorm([np.min(y), threshv, np.max(y)], cmap.N)
sc = ax.scatter(x, y, c=c, norm=norm, cmap=cmap)
Unfortunately, matplotlib doesn't have an easy option to change the color of only part of a line. We will have to write the logic ourselves. The trick is to cut the line up into a collection of line segments, then assign a color to each of them, and then plot them.
from matplotlib import pyplot as plt
from matplotlib.collections import LineCollection
import numpy as np
# The x and y data to plot
y = np.array([1,2,17,20,16,3,5,4])
x = np.arange(len(y))
# Threshold above which the line should be red
threshold = 15
# Create line segments: 1--2, 2--17, 17--20, 20--16, 16--3, etc.
segments_x = np.r_[x[0], x[1:-1].repeat(2), x[-1]].reshape(-1, 2)
segments_y = np.r_[y[0], y[1:-1].repeat(2), y[-1]].reshape(-1, 2)
# Assign colors to the line segments
linecolors = ['red' if y_[0] > threshold and y_[1] > threshold else 'blue'
for y_ in segments_y]
# Stamp x,y coordinates of the segments into the proper format for the
# LineCollection
segments = [zip(x_, y_) for x_, y_ in zip(segments_x, segments_y)]
# Create figure
plt.figure()
ax = plt.axes()
# Add a collection of lines
ax.add_collection(LineCollection(segments, colors=linecolors))
# Set x and y limits... sadly this is not done automatically for line
# collections
ax.set_xlim(0, 8)
ax.set_ylim(0, 21)
Your second option is much easier. We first draw the line and then add the markers as a scatterplot on top of it:
from matplotlib import pyplot as plt
import numpy as np
# The x and y data to plot
y = np.array([1,2,17,20,16,3,5,4])
x = np.arange(len(y))
# Threshold above which the markers should be red
threshold = 15
# Create figure
plt.figure()
# Plot the line
plt.plot(x, y, color='blue')
# Add below threshold markers
below_threshold = y < threshold
plt.scatter(x[below_threshold], y[below_threshold], color='green')
# Add above threshold markers
above_threshold = np.logical_not(below_threshold)
plt.scatter(x[above_threshold], y[above_threshold], color='red')
Basically #RaJa provides the solution, but I think that you can do the same without loading an additional package (pandas), by using masked arrays in numpy:
import numpy as np
import matplotlib.pyplot as plt
a = np.array([1,2,17,20,16,3,5,4])
# use a masked array to suppress the values that are too low
a_masked = np.ma.masked_less_equal(a, 15)
# plot the full line
plt.plot(a, 'k')
# plot only the large values
plt.plot(a_masked, 'r', linewidth=2)
# add the threshold value (optional)
plt.axhline(15, color='k', linestyle='--')
plt.show()
Result:
I don't know wether there is a built-in function in matplolib. But you could convert your numpy array into a pandas series and then use the plot function in combination with boolean selection/masking.
import numpy as np
import pandas as pd
a = np.array([1,2,17,20,16,3,5,4])
aPandas = pd.Series(a)
aPandas.plot()
aPandas[aPandas > 15].plot(color = 'red')

Matplotlib - add colorbar to a sequence of line plots

I have a sequence of line plots for two variables (x,y) for a number of different values of a variable z. I would normally add the line plots with legends like this:
import matplotlib.pyplot as plt
fig = plt.figure()
ax = fig.add_subplot(111)
# suppose mydata is a list of tuples containing (xs, ys, z)
# where xs and ys are lists of x's and y's and z is a number.
legns = []
for(xs,ys,z) in mydata:
pl = ax.plot(xs,ys,color = (z,0,0))
legns.append("z = %f"%(z))
ax.legends(legns)
plt.show()
But I have too many graphs and the legends will cover the graph. I'd rather have a colorbar indicating the value of z corresponding to the color. I can't find anything like that in the galery and all my attempts do deal with the colorbar failed. Apparently I must create a collection of plots before trying to add a colorbar.
Is there an easy way to do this? Thanks.
EDIT (clarification):
I wanted to do something like this:
import matplotlib.pyplot as plt
import matplotlib.cm as cm
fig = plt.figure()
ax = fig.add_subplot(111)
mycmap = cm.hot
# suppose mydata is a list of tuples containing (xs, ys, z)
# where xs and ys are lists of x's and y's and z is a number between 0 and 1
plots = []
for(xs,ys,z) in mydata:
pl = ax.plot(xs,ys,color = mycmap(z))
plots.append(pl)
fig.colorbar(plots)
plt.show()
But this won't work according to the Matplotlib reference because a list of plots is not a "mappable", whatever this means.
I've created an alternative plot function using LineCollection:
def myplot(ax,xs,ys,zs, cmap):
plot = lc([zip(x,y) for (x,y) in zip(xs,ys)], cmap = cmap)
plot.set_array(array(zs))
x0,x1 = amin(xs),amax(xs)
y0,y1 = amin(ys),amax(ys)
ax.add_collection(plot)
ax.set_xlim(x0,x1)
ax.set_ylim(y0,y1)
return plot
xs and ys are lists of lists of x and y coordinates and zs is a list of the different conditions to colorize each line. It feels a bit like a cludge though... I thought that there would be a more neat way to do this. I like the flexibility of the plt.plot() function.
(I know this is an old question but...) Colorbars require a matplotlib.cm.ScalarMappable, plt.plot produces lines which are not scalar mappable, therefore, in order to make a colorbar, we are going to need to make a scalar mappable.
Ok. So the constructor of a ScalarMappable takes a cmap and a norm instance. (norms scale data to the range 0-1, cmaps you have already worked with and take a number between 0-1 and returns a color). So in your case:
import matplotlib.pyplot as plt
sm = plt.cm.ScalarMappable(cmap=my_cmap, norm=plt.normalize(min=0, max=1))
plt.colorbar(sm)
Because your data is in the range 0-1 already, you can simplify the sm creation to:
sm = plt.cm.ScalarMappable(cmap=my_cmap)
EDIT: For matplotlib v1.2 or greater the code becomes:
import matplotlib.pyplot as plt
sm = plt.cm.ScalarMappable(cmap=my_cmap, norm=plt.normalize(vmin=0, vmax=1))
# fake up the array of the scalar mappable. Urgh...
sm._A = []
plt.colorbar(sm)
EDIT: For matplotlib v1.3 or greater the code becomes:
import matplotlib.pyplot as plt
sm = plt.cm.ScalarMappable(cmap=my_cmap, norm=plt.Normalize(vmin=0, vmax=1))
# fake up the array of the scalar mappable. Urgh...
sm._A = []
plt.colorbar(sm)
EDIT: For matplotlib v3.1 or greater simplifies to:
import matplotlib.pyplot as plt
sm = plt.cm.ScalarMappable(cmap=my_cmap, norm=plt.Normalize(vmin=0, vmax=1))
plt.colorbar(sm)
Here's one way to do it while still using plt.plot(). Basically, you make a throw-away plot and get the colorbar from there.
import matplotlib as mpl
import matplotlib.pyplot as plt
min, max = (-40, 30)
step = 10
# Setting up a colormap that's a simple transtion
mymap = mpl.colors.LinearSegmentedColormap.from_list('mycolors',['blue','red'])
# Using contourf to provide my colorbar info, then clearing the figure
Z = [[0,0],[0,0]]
levels = range(min,max+step,step)
CS3 = plt.contourf(Z, levels, cmap=mymap)
plt.clf()
# Plotting what I actually want
X=[[1,2],[1,2],[1,2],[1,2]]
Y=[[1,2],[1,3],[1,4],[1,5]]
Z=[-40,-20,0,30]
for x,y,z in zip(X,Y,Z):
# setting rgb color based on z normalized to my range
r = (float(z)-min)/(max-min)
g = 0
b = 1-r
plt.plot(x,y,color=(r,g,b))
plt.colorbar(CS3) # using the colorbar info I got from contourf
plt.show()
It's a little wasteful, but convenient. It's also not very wasteful if you make multiple plots as you can call plt.colorbar() without regenerating the info for it.
Here is a slightly simplied example inspired by the top answer given by Boris and Hooked (Thanks for the great idea!):
1. Discrete colorbar
Discrete colorbar is more involved, because colormap generated by mpl.cm.get_cmap() is not a mappable image needed as a colorbar() argument. A dummie mappable needs to generated as shown below:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
n_lines = 5
x = np.linspace(0, 10, 100)
y = np.sin(x[:, None] + np.pi * np.linspace(0, 1, n_lines))
c = np.arange(1, n_lines + 1)
cmap = mpl.cm.get_cmap('jet', n_lines)
fig, ax = plt.subplots(dpi=100)
# Make dummie mappable
dummie_cax = ax.scatter(c, c, c=c, cmap=cmap)
# Clear axis
ax.cla()
for i, yi in enumerate(y.T):
ax.plot(x, yi, c=cmap(i))
fig.colorbar(dummie_cax, ticks=c)
plt.show();
This will produce a plot with a discrete colorbar:
2. Continuous colorbar
Continuous colorbar is less involved, as mpl.cm.ScalarMappable() allows us to obtain an "image" for colorbar().
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
n_lines = 5
x = np.linspace(0, 10, 100)
y = np.sin(x[:, None] + np.pi * np.linspace(0, 1, n_lines))
c = np.arange(1, n_lines + 1)
norm = mpl.colors.Normalize(vmin=c.min(), vmax=c.max())
cmap = mpl.cm.ScalarMappable(norm=norm, cmap=mpl.cm.jet)
cmap.set_array([])
fig, ax = plt.subplots(dpi=100)
for i, yi in enumerate(y.T):
ax.plot(x, yi, c=cmap.to_rgba(i + 1))
fig.colorbar(cmap, ticks=c)
plt.show();
This will produce a plot with a continuous colorbar:
[Side note] In this example, I personally don't know why cmap.set_array([]) is necessary (otherwise we'd get error messages). If someone understand the principles under the hood, please comment :)
As other answers here do try to use dummy plots, which is not really good style, here is a generic code for a
Discrete colorbar
A discrete colorbar is produced in the same way a continuous colorbar is created, just with a different Normalization. In this case a BoundaryNorm should be used.
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.colors
n_lines = 5
x = np.linspace(0, 10, 100)
y = np.sin(x[:, None] + np.pi * np.linspace(0, 1, n_lines))
c = np.arange(1., n_lines + 1)
cmap = plt.get_cmap("jet", len(c))
norm = matplotlib.colors.BoundaryNorm(np.arange(len(c)+1)+0.5,len(c))
sm = plt.cm.ScalarMappable(norm=norm, cmap=cmap)
sm.set_array([]) # this line may be ommitted for matplotlib >= 3.1
fig, ax = plt.subplots(dpi=100)
for i, yi in enumerate(y.T):
ax.plot(x, yi, c=cmap(i))
fig.colorbar(sm, ticks=c)
plt.show()

Categories