Related
I'm trying to scale the y-axis so my errorbars can be seen.
Any help would be appreciated! :)
Here is my current code.
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
# if using a Jupyter notebook, include:
%matplotlib inline
x = ntermsList
y = allPmuCycleCountAverages
xerr = 0
yerr = allPmuCycleCountStandardDeviations
fig, ax = plt.subplots()
ax.errorbar(x, y, xerr=xerr, yerr=yerr,fmt='-o')
ax.set_xlabel('x-axis')
ax.set_ylabel('y-axis')
ax.set_title('Line plot with error bars')
ax.set_xticks(ntermsList)
ax.set_xticklabels(ntermsList)
ax.set_yticks(allPmuCycleCountAverages)
ax.yaxis.grid(True)
plt.show()
I've tried these solutions, but no joy:
plt.ylim(-1, 1)
plt.rcParams["figure.figsize"] = [7.50, 3.50]
plt.rcParams["figure.autolayout"] = True
plt.yticks(np.arange(min(y), max(y)+0.5, 0.01))
I was expecting the y-axis scale to zoom close enough to the points so my errorbars could be seen
Try autoscalling based in y ticks. Here I'm adding some logic that just rescales the y-axis based on the data that is in the visible x-region. As I don't have your data I took random data.
import numpy as np
import random
ntermsList = np.random.randint(low=0, high=10, size=(555,))
allPmuCycleCountAverages = np.random.randint(low=0, high=10, size=(555,))
allPmuCycleCountStandardDeviations = np.random.randint(low=0, high=10, size=(555,))
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
# if using a Jupyter notebook, include:
%matplotlib inline
x = ntermsList
y = allPmuCycleCountAverages
xerr = 0
yerr = allPmuCycleCountStandardDeviations
fig, ax = plt.subplots()
ax.errorbar(x, y, xerr=xerr, yerr=yerr,fmt='-o')
ax.set_xlabel('x-axis')
ax.set_ylabel('y-axis')
ax.set_title('Line plot with error bars')
ax.set_xticks(ntermsList)
ax.set_xticklabels(ntermsList)
ax.set_yticks(allPmuCycleCountAverages)
#plt.setp(ax.get_yticklabels(), rotation=90, horizontalalignment='right')
ax.yaxis.grid(True)
margin =0.1
def get_bottom_top(line):
xd = line.get_xdata()
yd = line.get_ydata()
lo,hi = ax.get_xlim()
y_displayed = yd[((xd>lo) & (xd<hi))]
h = np.max(y_displayed) - np.min(y_displayed)
bot = np.min(y_displayed)-margin*h
top = np.max(y_displayed)+margin*h
return bot,top
lines = ax.get_lines()
bot,top = np.inf, -np.inf
for line in lines:
new_bot, new_top = get_bottom_top(line)
if new_bot < bot: bot = new_bot
if new_top > top: top = new_top
ax.set_ylim(bot,top)
plt.show()
Before Rescalling
After rescalling
I am trying to shade the area before the point of intersection of the two curves produced by this example code:
import numpy as np
import matplotlib.pyplot as plt
x = np.arange(0,100,10)
y1 = [0,2,4,6,8,5,4,3,2,1]
y2 = [0,1,3,5,6,8,9,12,13,14]
fig = plt.figure()
ax = fig.add_subplot(111)
ax.plot(t_list,y1,linestyle='-')
ax.plot(t_list,y2,linestyle='--')
plt.show()
Simply using:
ax.fill_between(x,y1,y2,where=y1>=y2,color='grey',alpha='0.5')
Does no work and gives the following error: "ValueError: Argument dimensions are incompatible"
I tried to convert the lists into arrays:
z1 = np.array(y1)
z2 = np.array(y2)
Then:
ax.fill_between(x,y1,y2,where=z1>=z2,color='grey',alpha='0.5')
Not the entire area was shaded.
I know I have to find the point of intersection between the two curves by interpolating but have not seen a simple way to do it.
You are completely right, you need to interpolate. And that is ludicrously complicated, as you need to add the interpolate=True keyword argument to the call to fill_between.
ax.fill_between(x,y1,y2,where=z1>=z2,color='grey', interpolate=True)
Complete code to reproduce:
import numpy as np
import matplotlib.pyplot as plt
x = np.arange(0,100,10)
y1 = [0,2,4,6,8,5,4,3,2,1]
y2 = [0,1,3,5,6,8,9,12,13,14]
fig = plt.figure()
ax = fig.add_subplot(111)
ax.plot(x,y1,linestyle='-')
ax.plot(x,y2,linestyle='--')
z1 = np.array(y1)
z2 = np.array(y2)
ax.fill_between(x,y1,y2,where=z1>=z2,color='grey',alpha=0.5, interpolate=True)
plt.show()
I have 2 line plots on the same figure, plotted from pandas dataframes.
I want to fill between them with a gradient/colour map of sorts.
I understand I can do this with a cmap, only it will not work for me (see code below).
General example I found are filling between x axis and line, i do not want that and also i am interested in simplest solution possible for this as i am a begginer at this and complicated, though maybe better code will just make it more confusing honestly.
Code for which fill is plain blue:
import matplotlib.pyplot as plt
import pandas as pd
ax = plt.gca()
df0.plot(kind='line', x='something', y='other', color='orange', ax=ax, legend=False, figsize=(20,10))
df1.plot(kind='line', x='something', y='other2', color='c', ax=ax, legend=False, figsize=(20,10))
ax.fill_between(x=df0['daysInAYear'], y1=df0['other'], y2 = df1['other2'], alpha=0.2, cmap=plt.cm.get_cmap("winter"))
plt.show()
EDIT/UPDATE: DATA EXAMPLE
other is ALWAYS >= other2
other other2 something (same for both)
15.6 -16.0 1
13.9 -26.7 2
13.3 -26.7 3
10.6 -26.1 4
12.8 -15.0 5
Final graph example:
I would like the fill to go from orange on top to blue at the bottom
Edit
In response to the edited question, here is an alternative approach which does the gradient vertically but doesn't use imshow.
import matplotlib.pyplot as plt
from matplotlib import colors, patches
import numpy as np
import pandas as pd
n = 100
nc = 100
x = np.linspace(0, np.pi*5, n)
y1 = [-50.0]
y2 = [50.0]
for ii in range(1, n):
y1.append(y1[ii-1] + (np.random.random()-0.3)*3)
y2.append(y2[ii-1] + (np.random.random()-0.5)*3)
y1 = np.array(y1)
y2 = np.array(y2)
z = np.linspace(0, 10, nc)
normalize = colors.Normalize(vmin=z.min(), vmax=z.max())
cmap = plt.cm.get_cmap('winter')
fig, ax = plt.subplots(1)
for ii in range(len(df['x'].values)-1):
y = np.linspace(y1[ii], y2[ii], nc)
yn = np.linspace(y1[ii+1], y2[ii+1], nc)
for kk in range(nc - 1):
p = patches.Polygon([[x[ii], y[kk]],
[x[ii+1], yn[kk]],
[x[ii+1], yn[kk+1]],
[x[ii], y[kk+1]]], color=cmap(normalize(z[kk])))
ax.add_patch(p)
plt.plot(x, y1, 'k-', lw=1)
plt.plot(x, y2, 'k-', lw=1)
plt.show()
The idea here being similar to that in my original answer, except the trapezoids are divided into nc pieces and each piece is colored separately. This has the advantage of scaling correctly for varying y1[ii], y2[ii] distances, as shown in this comparison,
It does, however, have the disadvantages of being much, much slower than imshow or the horizontal gradient method and of being unable to handle 'crossing' correctly.
The code to generate the second image in the above comparison:
import matplotlib.pyplot as plt
import numpy as np
from matplotlib import patches
from matplotlib.path import Path
x = np.linspace(0, 10, n)
y1 = [-50.0]
y2 = [50.0]
for ii in range(1, n):
y1.append(y1[ii-1] + (np.random.random()-0.2)*3)
y2.append(y2[ii-1] + (np.random.random()-0.5)*3)
y1 = np.array(y1)
y2 = np.array(y2)
verts = np.vstack([np.stack([x, y1], 1), np.stack([np.flip(x), np.flip(y2)], 1)])
path = Path(verts)
patch = patches.PathPatch(path, facecolor='k', lw=2, alpha=0.0)
plt.gca().add_patch(patch)
plt.imshow(np.arange(10).reshape(10,-1), cmap=plt.cm.winter, interpolation="bicubic",
origin='upper', extent=[0,10,-60,60], aspect='auto', clip_path=patch,
clip_on=True)
plt.show()
Original
This is a bit of a hack, partly based on the answers in this question. It does seem to work fairly well but works best with higher density along the x axis. The idea is to call fill_between separately for each trapezoid corresponding to x pairs, [x[ii], x[ii+1]]. Here is a complete example using some generated data
import matplotlib.pyplot as plt
from matplotlib import colors
import numpy as np
import pandas as pd
n = 1000
X = np.linspace(0, np.pi*5, n)
Y1 = np.sin(X)
Y2 = np.cos(X)
Z = np.linspace(0, 10, n)
normalize = colors.Normalize(vmin=Z.min(), vmax=Z.max())
cmap = plt.cm.get_cmap('winter')
df = pd.DataFrame({'x': X, 'y1': Y1, 'y2': Y2, 'z': Z})
x = df['x'].values
y1 = df['y1'].values
y2 = df['y2'].values
z = df['z'].values
for ii in range(len(df['x'].values)-1):
plt.fill_between([x[ii], x[ii+1]], [y1[ii], y1[ii+1]],
[y2[ii], y2[ii+1]], color=cmap(normalize(z[ii])))
plt.plot(x, y1, 'k-', x, y2, 'k-')
plt.show()
This can be generalized to a 2 dimensional color grid but would require non-trivial modification
I want to create a plot for two different datasets similar to the one presented in this answer:
In the above image, the author managed to fix the overlapping problem of the error bars by adding some small random scatter in x to the new dataset.
In my problem, I must plot a similar graphic, but having some categorical data in the x axis:
Any ideas on how to slightly move one the error bars of the second dataset using categorical variables at the x axis? I want to avoid the overlapping between the bars for making the visualization easier.
You can translate each errorbar by adding the default data transform to a prior translation in data space. This is possible when knowing that categories are in general one data unit away from each other.
import numpy as np; np.random.seed(42)
import matplotlib.pyplot as plt
from matplotlib.transforms import Affine2D
x = list("ABCDEF")
y1, y2 = np.random.randn(2, len(x))
yerr1, yerr2 = np.random.rand(2, len(x))*4+0.3
fig, ax = plt.subplots()
trans1 = Affine2D().translate(-0.1, 0.0) + ax.transData
trans2 = Affine2D().translate(+0.1, 0.0) + ax.transData
er1 = ax.errorbar(x, y1, yerr=yerr1, marker="o", linestyle="none", transform=trans1)
er2 = ax.errorbar(x, y2, yerr=yerr2, marker="o", linestyle="none", transform=trans2)
plt.show()
Alternatively, you could translate the errorbars after applying the data transform and hence move them in units of points.
import numpy as np; np.random.seed(42)
import matplotlib.pyplot as plt
from matplotlib.transforms import ScaledTranslation
x = list("ABCDEF")
y1, y2 = np.random.randn(2, len(x))
yerr1, yerr2 = np.random.rand(2, len(x))*4+0.3
fig, ax = plt.subplots()
trans1 = ax.transData + ScaledTranslation(-5/72, 0, fig.dpi_scale_trans)
trans2 = ax.transData + ScaledTranslation(+5/72, 0, fig.dpi_scale_trans)
er1 = ax.errorbar(x, y1, yerr=yerr1, marker="o", linestyle="none", transform=trans1)
er2 = ax.errorbar(x, y2, yerr=yerr2, marker="o", linestyle="none", transform=trans2)
plt.show()
While results look similar in both cases, they are fundamentally different. You will observe this difference when interactively zooming the axes or changing the figure size.
Consider the following approach to highlight plots - combination of errorbar and fill_between with non-zero transparency:
import random
import matplotlib.pyplot as plt
# create sample data
N = 8
data_1 = {
'x': list(range(N)),
'y': [10. + random.random() for dummy in range(N)],
'yerr': [.25 + random.random() for dummy in range(N)]}
data_2 = {
'x': list(range(N)),
'y': [10.25 + .5 * random.random() for dummy in range(N)],
'yerr': [.5 * random.random() for dummy in range(N)]}
# plot
plt.figure()
# only errorbar
plt.subplot(211)
for data in [data_1, data_2]:
plt.errorbar(**data, fmt='o')
# errorbar + fill_between
plt.subplot(212)
for data in [data_1, data_2]:
plt.errorbar(**data, alpha=.75, fmt=':', capsize=3, capthick=1)
data = {
'x': data['x'],
'y1': [y - e for y, e in zip(data['y'], data['yerr'])],
'y2': [y + e for y, e in zip(data['y'], data['yerr'])]}
plt.fill_between(**data, alpha=.25)
Result:
Threre is example on lib site: https://matplotlib.org/stable/gallery/lines_bars_and_markers/errorbar_subsample.html
enter image description here
You need parameter errorevery=(m, n),
n - how often plot error lines, m - shift with range from 0 to n
I'm trying to plot a CDF from multiple simulation runs using Seaborn. I created a very simple code to emulate my results:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
df1 = pd.DataFrame({'A':np.random.randint(0, 100, 1000)})
df2 = pd.DataFrame({'A':np.random.randint(0, 100, 1000)})
df3 = pd.DataFrame({'A':np.random.randint(0, 100, 1000)})
f, ax = plt.subplots(figsize=(8, 8))
ax = sns.kdeplot(df1['A'], cumulative=True)
ax = sns.kdeplot(df2['A'], cumulative=True)
ax = sns.kdeplot(df3['A'], cumulative=True)
plt.show()
The code above creates the following plot:
But, since the three lines are results from the same simulation with different seeds, I'd like to "merge" the three lines into one and add a shaded area around the line, representing min and max or the std of the three different runs.
How can this be accomplished in Seaborn?
You may use fill_between to fill between two curves. Now here the problem is that the kde support would be different for the three curves. Obtaining a common kde support will require to calculate the cdf manually. This could be done as follows.
import numpy as np
from scipy import stats
import matplotlib.pyplot as plt
def cdf(data, limits="auto", npoints=600):
kde = stats.gaussian_kde(data)
bw = kde.factor
if limits == "auto":
limits = (data.min(), data.max())
limits = (limits[0]-bw*np.diff(limits)[0],
limits[1]+bw*np.diff(limits)[0])
x = np.linspace(limits[0], limits[1], npoints)
y = [kde.integrate_box(x[0],x[i]) for i in range(len(x))]
return x, np.array(y)
d1 = np.random.randint(14, 86, 1000)
d2 = np.random.randint(10, 100, 1000)
d3 = np.random.randint(0, 90, 1000)
mini = np.min((d1.min(), d2.min(), d3.min()))
maxi = np.max((d1.max(), d2.max(), d3.max()))
x1,y1 = cdf(d1, limits=(mini, maxi))
x2,y2 = cdf(d2, limits=(mini, maxi))
x3,y3 = cdf(d3, limits=(mini, maxi))
y = np.column_stack((y1, y2, y3))
ymin = np.min(y, axis=1)
ymax = np.max(y, axis=1)
f, ax = plt.subplots()
ax.plot(x1,y1)
ax.plot(x2,y2)
ax.plot(x3,y3)
ax.fill_between(x1, ymin, ymax, color="turquoise", alpha=0.4, zorder=0)
plt.show()