plotting multiple histograms using different line styles python - python

I want to plot multiple histograms using different line styles as I cannot use color to distinguish them. I get it but still, these look very similar as two plots have very similar distribution. Can I get different markers such as dots, stars etc or some better way to distinguish these?
This is what I have
import matplotlib
matplotlib.use('PS')
import matplotlib.pyplot as plt
plt.hist(values1, histtype='step', linestyle=':',label=topic1)
plt.hist(values2, histtype='step', linestyle='--',color=color, label=topic2)
plt.hist(values3,histtype='step', linestyle='solid', label=topic3)
plt.legend(loc="upper right")
plt.legend(frameon=False)
plt.show()
plt.savefig(allplotfile)
plt.close()

You may use hatching, e.g. hatch="\\\\" in the call to hist. I'm not convinced that it looks better, but it's at least an option.
import matplotlib.pyplot as plt
plt.style.use("grayscale")
import numpy as np; np.random.seed(1)
plt.rcParams["figure.figsize"] = (4,3)
vals = np.arange(2,5.1,0.5)
p = np.array([1,.2,.36,.15,.38,.28,.4])
p = p/np.sum(p)
a = np.random.choice(vals, size=100, p=p)
b = np.random.choice(vals, size=100, p=p)
c = np.random.choice(vals, size=100, p=p)
plt.hist(a, histtype='step', linestyle=':',label="topic1", hatch="\\\\")
plt.hist(b, histtype='step', linestyle='--', label="topic2", hatch="//")
plt.hist(c,histtype='step', linestyle='solid', label="topic3", hatch="++")
plt.legend(loc="upper right")
plt.legend(frameon=False)
plt.show()
Different linewidths may also help: lw=2 etc,
(here I used 1, 2 and 3 as linewidths)
Different shades of gray, combines with alpha settings may also help:
plt.hist(a, linestyle=':', color=plt.cm.gray(0.1), alpha=0.5)
plt.hist(b, linestyle='--' ,color=plt.cm.gray(0.4), alpha=0.5)
plt.hist(c, linestyle='solid', color=plt.cm.gray(0.8), alpha=0.5)

Related

How to plot a paired histogram using seaborn

I would like to make a paired histogram like the one shown here using the seaborn distplot.
This kind of plot can also be referred to as the back-to-back histogram shown here, or a bihistogram inverted/mirrored along the x-axis as discussed here.
Here is my code:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
green = np.random.normal(20,10,1000)
blue = np.random.poisson(60,1000)
fig, ax = plt.subplots(figsize=(8,6))
sns.distplot(blue, hist=True, kde=True, hist_kws={'edgecolor':'black'}, kde_kws={'linewidth':2}, bins=10, color='blue')
sns.distplot(green, hist=True, kde=True, hist_kws={'edgecolor':'black'}, kde_kws={'linewidth':2}, bins=10, color='green')
ax.set_xticks(np.arange(-20,121,20))
ax.set_yticks(np.arange(0.0,0.07,0.01))
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
plt.show()
Here is the output:
When I use the method discussed here (plt.barh), I get the bar plot shown just below, which is not what I am looking for.
Or maybe I haven't understood the workaround well enough...
A simple/short implementation of python-seaborn-distplot similar to these kinds of plots would be perfect. I edited the figure of my first plot above to show the kind of plot I hope to achieve (though y-axis not upside down):
Any leads would be greatly appreciated.
You could use two subplots and invert the y-axis of the lower one and plot with the same bins.
df = pd.DataFrame({'a': np.random.normal(0,5,1000), 'b': np.random.normal(20,5,1000)})
fig =plt.figure(figsize=(5,5))
ax = fig.add_subplot(211)
ax2 = fig.add_subplot(212)
bins = np.arange(-20,40)
ax.hist(df['a'], bins=bins)
ax2.hist(df['b'],color='orange', bins=bins)
ax2.invert_yaxis()
edit:
improvements suggested by #mwaskom
fig, axes = plt.subplots(nrows=2, ncols=1, sharex=True, figsize=(5,5))
bins = np.arange(-20,40)
for ax, column, color, invert in zip(axes.ravel(), df.columns, ['teal', 'orange'], [False,True]):
ax.hist(df[column], bins=bins, color=color)
if invert:
ax.invert_yaxis()
plt.subplots_adjust(hspace=0)
Here is a possible approach using seaborn's displots.
Seaborn doesn't return the created graphical elements, but the ax can be interrogated. To make sure the ax only contains the elements you want upside down, those elements can be drawn first. Then, all the patches (the rectangular bars) and the lines (the curve for the kde) can be given their height in negative. Optionally the x-axis can be set at y == 0 using ax.spines['bottom'].set_position('zero').
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
green = np.random.normal(20, 10, 1000)
blue = np.random.poisson(60, 1000)
fig, ax = plt.subplots(figsize=(8, 6))
sns.distplot(green, hist=True, kde=True, hist_kws={'edgecolor': 'black'}, kde_kws={'linewidth': 2}, bins=10,
color='green')
for p in ax.patches: # turn the histogram upside down
p.set_height(-p.get_height())
for l in ax.lines: # turn the kde curve upside down
l.set_ydata(-l.get_ydata())
sns.distplot(blue, hist=True, kde=True, hist_kws={'edgecolor': 'black'}, kde_kws={'linewidth': 2}, bins=10,
color='blue')
ax.set_xticks(np.arange(-20, 121, 20))
ax.set_yticks(np.arange(0.0, 0.07, 0.01))
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
pos_ticks = np.array([t for t in ax.get_yticks() if t > 0])
ticks = np.concatenate([-pos_ticks[::-1], [0], pos_ticks])
ax.set_yticks(ticks)
ax.set_yticklabels([f'{abs(t):.2f}' for t in ticks])
ax.spines['bottom'].set_position('zero')
plt.show()

One legend entry when plotting several curves using one `plot` call

I am creating a grid by plotting several curves using one plot call as:
import matplotlib.pyplot as plt
import numpy as np
fig, ax = plt.subplots()
x = np.array([[0,1], [0,1], [0,1]])
y = np.array([[0,0], [1,1], [2,2]])
ax.plot([0,1],[0,2], label='foo', color='b')
ax.plot(x.T, y.T, label='bar', color='k')
ax.legend()
plt.show()
The resulting legend has as many 'bar' entries as there are curves (see below). I wish that have only one legend entry per plot call (in this case only one time 'bar').
I want this such that I can have other plot commands (e.g. the one plotting the 'foo' curve) whose curves are automatically included in the legend if they have a label. I specifically want to avoid hand-selecting the handles when constructing the legend, but rather use matplotlib's feature to deal with this by yes/no including a label when plotting. How can I achieve this?
Here is one possible solution: You may use the fact that underscores do not produce legend entries. So setting all but the first label to "_" suppresses those to appear in the legend.
import matplotlib.pyplot as plt
import numpy as np
fig, ax = plt.subplots()
x = np.array([[0,1], [0,1], [0,1]])
y = np.array([[0,0], [1,1], [2,2]])
ax.plot([0,1],[0,2], label='foo', color='b')
lines = ax.plot(x.T, y.T, label='bar', color='k')
plt.setp(lines[1:], label="_")
ax.legend()
plt.show()
Following is one way using the already existing legend handles and labels. You first get the three handles, labels and then just show the first one. This way additionally gives you a control not only on the order of putting handles but also what to show on the plot.
ax.plot(x.T, y.T, label='bar', color='k')
handles, labels = ax.get_legend_handles_labels()
ax.legend([handles[0]], [labels[0]], loc='best')
Alternative approach where the legends will only be taken from a particular plot (set of lines) -- ax1 in this case
ax1 = ax.plot(x.T, y.T, label='bar', color='k')
plt.legend(handles=[ax1[0]], loc='best')
Extending it to you problem with two figures
ax1 = ax.plot([0,1],[0,2], label='foo', color='b')
ax2 = ax.plot(x.T, y.T, label='bar', color='k')
plt.legend(handles=[ax1[0], ax2[1]], loc='best')
Another alternative using for loops as suggested by #SpghttCd
for i in range(len(x)):
ax.plot(x[i], y[i], label=('' if i==0 else '_') + 'bar', color='k')
ax.legend()
Maybe not quite elegant, but the easiest and most straightforward way is to make a second plot using a single pair of elements where you prescribe the 'label' you want!
import matplotlib.pyplot as plt
import numpy as np
fig, ax = plt.subplots()
x = np.array([[0,1], [0,1], [0,1]])
y = np.array([[0,0], [1,1], [2,2]])
ax.plot([0,1],[0,2], label='foo', color='b')
ax.plot(x.T, y.T, color='k')
ax.plot(x[0].T, y[0].T, label='bar', color='k')
ax.legend()
plt.show()

matplotlib fill_between facecolor alpha vs edgecolor alpha?

Is there a way to either specify different alphas for facecolor vs edgecolor? Or is there a way to plot an alpha filled area with non-alpha edgecolor that also works in the legend?
This is not what I want...
axs.fill_between(xvalues, tupper_w, tlower_w, facecolor='dimgray', edgecolor='dimgray', alpha=0.25, label='$measured\quad\sigma$')
axs.fill_between(xvalues, pupper_w, plower_w, facecolor='orange', edgecolor='orange', alpha=0.25, label='$predicted\quad\sigma$')
axs.plot(xvalues, tcurvesavg_w, color='dimgray', label='$\overline{measured}$', ls='--')
axs.plot(xvalues, pcurvesavg_w, color='orange', label='$\overline{predicted}$', ls='--')
This is what I want (but with proper legend):
axs.fill_between(xvalues, tupper, tlower, facecolor='dimgray', alpha=0.25, label='$measured\quad\sigma$')
axs.fill_between(xvalues, pupper, plower, facecolor='orange', alpha=0.25, label='$predicted\quad\sigma$')
axs.plot(xvalues, tupper, color='dimgray', lw=0.5)
axs.plot(xvalues, tcurvesavg, color='dimgray', label='$\overline{measured}$', ls='--')
axs.plot(xvalues, tlower, color='dimgray', lw=0.5)
axs.plot(xvalues, pupper, color='orange', lw=0.5)
axs.plot(xvalues, pcurvesavg, color='orange', label='$\overline{predicted}$', ls='--')
axs.plot(xvalues, plower, color='orange', lw=0.5)
You cannot specify different alpha values via the alpha argument. However you can define each of facecolor and edgecolor with an alpha channel, e.g. for red with 40% opacity
facecolor=(1,0,0,.4)
This is then directly applied in the legend.
import matplotlib.pyplot as plt
import numpy as np
x = np.arange(0.0, 2, 0.01)
y1 = np.sin(2*np.pi*x)
y2 = 1.2*np.sin(4*np.pi*x)-.9
fig, ax = plt.subplots()
ax.fill_between(x, y1, y1+.5, facecolor=(1,0,0,.4), edgecolor=(0,0,0,.5), label="Label 1")
ax.fill_between(x, y2, y2-.5, facecolor=(0,0,1,.4), edgecolor=(0,0,0,.5), label="Label 1")
ax.legend()
plt.show()
Immediately, looking at fill_between alpha and general fill_between documentation it appears to be unsupported. The legend documentation doesn't seem to provide an option for adding your border after plotting either.
In your second code snippet, if you can figure out how to get the plot and fill functions to have a single handle then the legend should automatically format. Something similar to below (adapted from this similar, but not quite duplicate StackExchangePost):
import matplotlib.patches as mpatches
import matplotlib.pyplot as plt
import numpy as np
xvalues = np.linspace(0,1,11)
tcurvesavg = np.linspace(0,1,11)
p1, = plt.plot(xvalues, tcurvesavg , c='r') # notice the comma!
p2 = plt.fill_between(xvalues, tcurvesavg -0.2, tcurvesavg +0.2, color='r', alpha=0.5)
plt.legend(((p1,p2),), ('Entry',))
plt.show()
(As a non-automated workaround for most matplotlib questions, save as a svg (similar to this post) and add a border in a vector graphics program like Inkscape. You shouldn't lose resolution, and could still put it in reports etc.)

Python - How to have same maximum on multiple histograms

I have two distribution with different spread, say
a=N.random.normal(0,0.5,500)
b=N.random.normal(1,3.,500)
I want to make an histogram of them where the maximum is at the same level.
If I make normalised histograms with density=True parameter, it will make that area of both histograms will be 1, bit it wont change the fact that maximums are different.
P.hist(a, histtype='step', lw=2, cumulative=True)
P.hist(b, histtype='step', color='r', lw=2, density=True)
What I want is to make a histogram where it is "normalized" with the maximum values. I mean when maximum of blue and red histogram would be the same, so it could be easy to compare them. Thanks a lot in advance for your help.
I'm not sure I would do that really, but if you want to I think the best way is to add two axes (also so that you can see how tall they really are). For example, see here: https://matplotlib.org/gallery/api/two_scales.html
import numpy as np
from matplotlib import pyplot as plt
fig, ax1 = plt.subplots()
ax1.hist(a, histtype='step', color='b', lw=2, density=True)
ax1.tick_params(axis='y', labelcolor='b')
ax2 = ax1.twinx()
ax2.hist(b, histtype='step', color='r', lw=2, density=True)
ax2.tick_params(axis='y', labelcolor='r')
This gives the following output (which, I think, looks worse than what you obtained; I also changed cumulative=True to density=True in the first plot to be in line with the plot you provided):
Also, strictly speaking this does not make sure that the maxima are really identical. If you want to do that you can force it by doing e.g.
import numpy as np
from matplotlib import pyplot as plt
fig, ax1 = plt.subplots()
n1, _, _ = ax1.hist(a, histtype='step', color='b', lw=2, density=True)
ax1.tick_params(axis='y', labelcolor='b')
ax2 = ax1.twinx()
n2, _, _ = ax2.hist(b, histtype='step', color='r', lw=2, density=True)
ax2.tick_params(axis='y', labelcolor='r')
ax1.set_ylim([0, n1.max()*1.1])
ax2.set_ylim([0, n2.max()*1.1])
The following code would give a the same max as b:
a *= b.max()/a.max()
The cumulative flag in a might break this though and it should be placed before the histograms are generated.

Remove line through points in matplotlib plot

I have the following matplotlib snippet:
fig, ax = plt.subplots(figsize=(6,6))
values = np.random.normal(loc=0, scale=1, size=10)
ax.plot(range(10), values, 'r^', markersize=15, alpha=0.4);
which produces
as planned.
I'd like to make the line invisible where it overlaps with the points so that the points look more joined by the line rather than lying on top of the line. It is possible to do this by either making the line invisible where they overlap or to create a new line object that simply links the points rather than traces them?
To be explicit, I do not want the entire line removed, just the sections that overlap with the points.
It is in general hard to let the lines stop at the edges of the markers. The reason is that lines are defined in data coordinates, while the markers are defined in points.
A workaround would be to hide the lines where the markers are. We may think of a three layer system. The lowest layer (zorder=1) contains the lines, just as they are. The layer above contains markers of the same shape and size as those which are to be shown. Yet they would be colored in the same color as the background (usually white). The topmost layer contains the markers as desired.
import matplotlib.pyplot as plt
import numpy as np; np.random.seed(42)
fig, ax = plt.subplots(figsize=(6,5))
def plot_hidden_lines(x,y, ax = None, ms=15, color="r",
marker="^", alpha=0.4,**kwargs):
if not ax: ax=plt.gca()
ax.scatter(x,y, c=color, s=ms**2, marker=marker, alpha=alpha, zorder=3)
ax.scatter(x,y, c="w", s=ms**2, marker=marker, alpha=1, zorder=2)
ax.plot(x,y, color=color, zorder=1,alpha=alpha,**kwargs)
values1 = np.random.normal(loc=0, scale=1, size=10)
values2 = np.random.normal(loc=0, scale=1, size=10)
x = np.arange(len(values1))
plot_hidden_lines(x,values1)
plot_hidden_lines(x,values2, color="indigo", ms=20, marker="s")
plt.show()
I think the best way to go about it is to overlay the triangles over the lines:
import matplotlib.pyplot as plt
import numpy as np
values = np.random.normal(loc=0, scale=1, size=10)
plt.plot( range(10), values, marker='^', markerfacecolor='red', markersize=15, color='red', linewidth=2)
plt.show()
The program outputs:
If you really want the see through aspect, I suggest you somehow calculate where the lines overlap with the markers and only draw the lines inbetween:
import numpy as np
import matplotlib.pyplot as plt
values = np.random.normal(loc= 0, scale=1, size=10)
for i in range(9):
start_coordinate, end_coordinate = some_function(values[i], values[i+1])
plt.plot([i, i+1], [start_coordinate, end_coordinate], *whatever_other_arguments)
plt.scatter(range(10), values, *whatever_other_arguments)
plt.show()
The hard part here is of course calculating these coordinates (if you want to zoom in this won't work), but honestly, given the difficulty of this question, I think you won't find anything much better...

Categories