Reproduce two distributions as provided on a single plot using Python - python

I want to draw distributions like shown in figure below -- tail of distributions. I have tried following but not quite getting there:
import matplotlib.pyplot as plt
import numpy as np
import matplotlib.mlab as mlab
import math
mean1 = 0
variance1 = 1
sigma1 = math.sqrt(variance1)
x = np.linspace(-3,3.5,100, endpoint=True)
plt.plot(x,mlab.normpdf(x,mean1,sigma1))
mean2 = 0.4
variance2 = 2
sigma2 = math.sqrt(variance2)
y = np.linspace(-4,3.5,100, endpoint=False)
plt.plot(x,mlab.normpdf(y,mean2,sigma2))
##plt.axis('off')
plt.yticks([])
plt.xticks([])
plt.show()
Any suggestions would be appreciative?

You want fill_between:
import matplotlib.pyplot as plt
import numpy as np
import matplotlib.mlab as mlab
import math
mean1 = 0
variance1 = 1
sigma1 = math.sqrt(variance1)
x = np.linspace(-3,3.5,100, endpoint=True)
y1 = mlab.normpdf(x,mean1,sigma1)
fig, ax = plt.subplots()
ax.plot(x,y1)
mean2 = 0.4
variance2 = 2
sigma2 = math.sqrt(variance2)
y = np.linspace(-4,3.5,100, endpoint=False)
y2 = mlab.normpdf(y,mean2,sigma2)
ax.plot(x,y2)
ax.fill_between(x[:30], y1[:30], color='blue')
ax.fill_between(x[:30], y2[:30], color='green')
ax.fill_between(x[-30:], y1[-30:], y2[-30:], color='red', alpha=0.5)
ax.set_yticks([])
ax.set_xticks([])
plt.savefig('fill_norms.png')
plt.show()
This is a crazy simple example -- see the cookbook examples and look at the where clause; your fill-between highlights can adapt to changes in the lines you're plotting (e.g., an automatic red fill_between everywhere BADTHING exceeds GOODTHING, without your having to figure out the index (30 or -30 in this example)).

Related

Scale y-axis for really small numbers

I'm trying to scale the y-axis so my errorbars can be seen.
Any help would be appreciated! :)
Here is my current code.
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
# if using a Jupyter notebook, include:
%matplotlib inline
x = ntermsList
y = allPmuCycleCountAverages
xerr = 0
yerr = allPmuCycleCountStandardDeviations
fig, ax = plt.subplots()
ax.errorbar(x, y, xerr=xerr, yerr=yerr,fmt='-o')
ax.set_xlabel('x-axis')
ax.set_ylabel('y-axis')
ax.set_title('Line plot with error bars')
ax.set_xticks(ntermsList)
ax.set_xticklabels(ntermsList)
ax.set_yticks(allPmuCycleCountAverages)
ax.yaxis.grid(True)
plt.show()
I've tried these solutions, but no joy:
plt.ylim(-1, 1)
plt.rcParams["figure.figsize"] = [7.50, 3.50]
plt.rcParams["figure.autolayout"] = True
plt.yticks(np.arange(min(y), max(y)+0.5, 0.01))
I was expecting the y-axis scale to zoom close enough to the points so my errorbars could be seen
Try autoscalling based in y ticks. Here I'm adding some logic that just rescales the y-axis based on the data that is in the visible x-region. As I don't have your data I took random data.
import numpy as np
import random
ntermsList = np.random.randint(low=0, high=10, size=(555,))
allPmuCycleCountAverages = np.random.randint(low=0, high=10, size=(555,))
allPmuCycleCountStandardDeviations = np.random.randint(low=0, high=10, size=(555,))
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
# if using a Jupyter notebook, include:
%matplotlib inline
x = ntermsList
y = allPmuCycleCountAverages
xerr = 0
yerr = allPmuCycleCountStandardDeviations
fig, ax = plt.subplots()
ax.errorbar(x, y, xerr=xerr, yerr=yerr,fmt='-o')
ax.set_xlabel('x-axis')
ax.set_ylabel('y-axis')
ax.set_title('Line plot with error bars')
ax.set_xticks(ntermsList)
ax.set_xticklabels(ntermsList)
ax.set_yticks(allPmuCycleCountAverages)
#plt.setp(ax.get_yticklabels(), rotation=90, horizontalalignment='right')
ax.yaxis.grid(True)
margin =0.1
def get_bottom_top(line):
xd = line.get_xdata()
yd = line.get_ydata()
lo,hi = ax.get_xlim()
y_displayed = yd[((xd>lo) & (xd<hi))]
h = np.max(y_displayed) - np.min(y_displayed)
bot = np.min(y_displayed)-margin*h
top = np.max(y_displayed)+margin*h
return bot,top
lines = ax.get_lines()
bot,top = np.inf, -np.inf
for line in lines:
new_bot, new_top = get_bottom_top(line)
if new_bot < bot: bot = new_bot
if new_top > top: top = new_top
ax.set_ylim(bot,top)
plt.show()
Before Rescalling
After rescalling

Fill between standard deviations on Matplotlib lineplot

I have a line plot which graphs 2 columns of data loaded from an excel file. See Plot:
Matplotlib line plot
I want to display the standard deviation for each line (i.e., column of data) in my plot using the fill between function or something similar (like image 2), however I cannot figure out how to include this. Any help would be great. Please note that I am very new to this...
How I would like my plot to look
Here is the code used for my lineplot:
import pandas as pd
import scipy as sp
from scipy import stats
import numpy as np
import matplotlib.pyplot as plt
Probability_fast_gamma_on_theta = pd.read_excel(r'C:\Users\RL\Excel work\Probability_of_a_theta_cycle_containing_fast_gamma.xlsx')
fig5, ax5=plt.subplots()
plt.plot(Probability_fast_gamma_on_theta.MIA)
plt.plot(Probability_fast_gamma_on_theta.CTL)
plt.grid(False)
plt.ylim(0.0, 0.6)
plt.xlim(0.0, 25)
plt.legend(['MIA', 'CTL'], loc='lower right')
plt.show()
This is straightforwardly done using the plt.fill_between() function. A minimal example would be as follows:
import numpy as np
import matplotlib.pyplot as plt
fig, ax = plt.subplots()
x = np.linspace(0, 2 * np.pi, 50)
y = np.sin(x) + np.random.randn(len(x)) * 0.03
yerr0 = y - (0.1 + np.random.randn(len(x)) * 0.03)
yerr1 = y + (0.1 + np.random.randn(len(x)) * 0.03)
ax.plot(x, y, color='C0')
plt.fill_between(x, yerr0, yerr1, color='C0', alpha=0.5)

Formatting a plot in Seaborn

I made a PMF plot using seaborn:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib.patches as mpatches
n= 1000 #number of trials
p= 0.5 #probability
trial_2 = np.random.binomial(n,p,1000)
sns.displot(trial_2, stat = 'probability')
trial_2_mean= np.mean(trial_2)
plt.axvline(trial_2_mean,color='red')
plt.xlabel("Number of Successes")
red_patch = mpatches.Patch(color='red', label='Mean')
plt.legend(handles=[red_patch])
I want to add text to the plot like below (the n=60 and p=0.1):
Also how do I plot in a format similar to the one in the picture (straight lines)
You can do following:
from scipy.stats import binom
n = 50
p = 0.1
x = [x for x in range(15)]
trial_2 = binom.pmf(x, n, p)
sns.scatterplot(x, trial_2,label=('$n=50, p=0.1$'))
plt.vlines(x, 0, trial_2, colors='red', lw=3, alpha=0.4)
plt.xticks(x)
plt.ylabel('Probability')
plt.xlabel('Number of Successes')
plt.show()
Produces:

Fill between subplots with matplotlib cmap

I have 2 line plots on the same figure, plotted from pandas dataframes.
I want to fill between them with a gradient/colour map of sorts.
I understand I can do this with a cmap, only it will not work for me (see code below).
General example I found are filling between x axis and line, i do not want that and also i am interested in simplest solution possible for this as i am a begginer at this and complicated, though maybe better code will just make it more confusing honestly.
Code for which fill is plain blue:
import matplotlib.pyplot as plt
import pandas as pd
ax = plt.gca()
df0.plot(kind='line', x='something', y='other', color='orange', ax=ax, legend=False, figsize=(20,10))
df1.plot(kind='line', x='something', y='other2', color='c', ax=ax, legend=False, figsize=(20,10))
ax.fill_between(x=df0['daysInAYear'], y1=df0['other'], y2 = df1['other2'], alpha=0.2, cmap=plt.cm.get_cmap("winter"))
plt.show()
EDIT/UPDATE: DATA EXAMPLE
other is ALWAYS >= other2
other other2 something (same for both)
15.6 -16.0 1
13.9 -26.7 2
13.3 -26.7 3
10.6 -26.1 4
12.8 -15.0 5
Final graph example:
I would like the fill to go from orange on top to blue at the bottom
Edit
In response to the edited question, here is an alternative approach which does the gradient vertically but doesn't use imshow.
import matplotlib.pyplot as plt
from matplotlib import colors, patches
import numpy as np
import pandas as pd
n = 100
nc = 100
x = np.linspace(0, np.pi*5, n)
y1 = [-50.0]
y2 = [50.0]
for ii in range(1, n):
y1.append(y1[ii-1] + (np.random.random()-0.3)*3)
y2.append(y2[ii-1] + (np.random.random()-0.5)*3)
y1 = np.array(y1)
y2 = np.array(y2)
z = np.linspace(0, 10, nc)
normalize = colors.Normalize(vmin=z.min(), vmax=z.max())
cmap = plt.cm.get_cmap('winter')
fig, ax = plt.subplots(1)
for ii in range(len(df['x'].values)-1):
y = np.linspace(y1[ii], y2[ii], nc)
yn = np.linspace(y1[ii+1], y2[ii+1], nc)
for kk in range(nc - 1):
p = patches.Polygon([[x[ii], y[kk]],
[x[ii+1], yn[kk]],
[x[ii+1], yn[kk+1]],
[x[ii], y[kk+1]]], color=cmap(normalize(z[kk])))
ax.add_patch(p)
plt.plot(x, y1, 'k-', lw=1)
plt.plot(x, y2, 'k-', lw=1)
plt.show()
The idea here being similar to that in my original answer, except the trapezoids are divided into nc pieces and each piece is colored separately. This has the advantage of scaling correctly for varying y1[ii], y2[ii] distances, as shown in this comparison,
It does, however, have the disadvantages of being much, much slower than imshow or the horizontal gradient method and of being unable to handle 'crossing' correctly.
The code to generate the second image in the above comparison:
import matplotlib.pyplot as plt
import numpy as np
from matplotlib import patches
from matplotlib.path import Path
x = np.linspace(0, 10, n)
y1 = [-50.0]
y2 = [50.0]
for ii in range(1, n):
y1.append(y1[ii-1] + (np.random.random()-0.2)*3)
y2.append(y2[ii-1] + (np.random.random()-0.5)*3)
y1 = np.array(y1)
y2 = np.array(y2)
verts = np.vstack([np.stack([x, y1], 1), np.stack([np.flip(x), np.flip(y2)], 1)])
path = Path(verts)
patch = patches.PathPatch(path, facecolor='k', lw=2, alpha=0.0)
plt.gca().add_patch(patch)
plt.imshow(np.arange(10).reshape(10,-1), cmap=plt.cm.winter, interpolation="bicubic",
origin='upper', extent=[0,10,-60,60], aspect='auto', clip_path=patch,
clip_on=True)
plt.show()
Original
This is a bit of a hack, partly based on the answers in this question. It does seem to work fairly well but works best with higher density along the x axis. The idea is to call fill_between separately for each trapezoid corresponding to x pairs, [x[ii], x[ii+1]]. Here is a complete example using some generated data
import matplotlib.pyplot as plt
from matplotlib import colors
import numpy as np
import pandas as pd
n = 1000
X = np.linspace(0, np.pi*5, n)
Y1 = np.sin(X)
Y2 = np.cos(X)
Z = np.linspace(0, 10, n)
normalize = colors.Normalize(vmin=Z.min(), vmax=Z.max())
cmap = plt.cm.get_cmap('winter')
df = pd.DataFrame({'x': X, 'y1': Y1, 'y2': Y2, 'z': Z})
x = df['x'].values
y1 = df['y1'].values
y2 = df['y2'].values
z = df['z'].values
for ii in range(len(df['x'].values)-1):
plt.fill_between([x[ii], x[ii+1]], [y1[ii], y1[ii+1]],
[y2[ii], y2[ii+1]], color=cmap(normalize(z[ii])))
plt.plot(x, y1, 'k-', x, y2, 'k-')
plt.show()
This can be generalized to a 2 dimensional color grid but would require non-trivial modification

Plot normal distribution in 3D

I am trying to plot the comun distribution of two normal distributed variables.
The code below plots one normal distributed variable. What would the code be for plotting two normal distributed variables?
import matplotlib.pyplot as plt
import numpy as np
import matplotlib.mlab as mlab
import math
mu = 0
variance = 1
sigma = math.sqrt(variance)
x = np.linspace(-3, 3, 100)
plt.plot(x,mlab.normpdf(x, mu, sigma))
plt.show()
It sounds like what you're looking for is a Multivariate Normal Distribution. This is implemented in scipy as scipy.stats.multivariate_normal. It's important to remember that you are passing a covariance matrix to the function. So to keep things simple keep the off diagonal elements as zero:
[X variance , 0 ]
[ 0 ,Y Variance]
Here is an example using this function and generating a 3D plot of the resulting distribution. I add the colormap to make seeing the curves easier but feel free to remove it.
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import multivariate_normal
from mpl_toolkits.mplot3d import Axes3D
#Parameters to set
mu_x = 0
variance_x = 3
mu_y = 0
variance_y = 15
#Create grid and multivariate normal
x = np.linspace(-10,10,500)
y = np.linspace(-10,10,500)
X, Y = np.meshgrid(x,y)
pos = np.empty(X.shape + (2,))
pos[:, :, 0] = X; pos[:, :, 1] = Y
rv = multivariate_normal([mu_x, mu_y], [[variance_x, 0], [0, variance_y]])
#Make a 3D plot
fig = plt.figure()
ax = fig.gca(projection='3d')
ax.plot_surface(X, Y, rv.pdf(pos),cmap='viridis',linewidth=0)
ax.set_xlabel('X axis')
ax.set_ylabel('Y axis')
ax.set_zlabel('Z axis')
plt.show()
Giving you this plot:
Edit the method used below was deprecated in Matplotlib v2.2 and removed in v3.1
A simpler version is available through matplotlib.mlab.bivariate_normal
It takes the following arguments so you don't need to worry about matrices
matplotlib.mlab.bivariate_normal(X, Y, sigmax=1.0, sigmay=1.0, mux=0.0, muy=0.0, sigmaxy=0.0)
Here X, and Y are again the result of a meshgrid so using this to recreate the above plot:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.mlab import bivariate_normal
from mpl_toolkits.mplot3d import Axes3D
#Parameters to set
mu_x = 0
sigma_x = np.sqrt(3)
mu_y = 0
sigma_y = np.sqrt(15)
#Create grid and multivariate normal
x = np.linspace(-10,10,500)
y = np.linspace(-10,10,500)
X, Y = np.meshgrid(x,y)
Z = bivariate_normal(X,Y,sigma_x,sigma_y,mu_x,mu_y)
#Make a 3D plot
fig = plt.figure()
ax = fig.gca(projection='3d')
ax.plot_surface(X, Y, Z,cmap='viridis',linewidth=0)
ax.set_xlabel('X axis')
ax.set_ylabel('Y axis')
ax.set_zlabel('Z axis')
plt.show()
Giving:
The following adaption to #Ianhi's code above returns a contour plot version of the 3D plot above.
import matplotlib.pyplot as plt
from matplotlib import style
style.use('fivethirtyeight')
import numpy as np
from scipy.stats import multivariate_normal
#Parameters to set
mu_x = 0
variance_x = 3
mu_y = 0
variance_y = 15
x = np.linspace(-10,10,500)
y = np.linspace(-10,10,500)
X,Y = np.meshgrid(x,y)
pos = np.array([X.flatten(),Y.flatten()]).T
rv = multivariate_normal([mu_x, mu_y], [[variance_x, 0], [0, variance_y]])
fig = plt.figure(figsize=(10,10))
ax0 = fig.add_subplot(111)
ax0.contour(X, Y, rv.pdf(pos).reshape(500,500))
plt.show()
While the other answers are great, I wanted to achieve similar results while also illustrating the distribution with a scatter plot of the sample.
More details can be found here: Python 3d plot of multivariate gaussian distribution
The results looks like:
And is generated using the following code:
from mpl_toolkits.mplot3d import Axes3D
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import cm
from scipy.stats import multivariate_normal
# Sample parameters
mu = np.array([0, 0])
sigma = np.array([[0.7, 0.2], [0.2, 0.3]])
rv = multivariate_normal(mu, sigma)
sample = rv.rvs(500)
# Bounds parameters
x_abs = 2.5
y_abs = 2.5
x_grid, y_grid = np.mgrid[-x_abs:x_abs:.02, -y_abs:y_abs:.02]
pos = np.empty(x_grid.shape + (2,))
pos[:, :, 0] = x_grid
pos[:, :, 1] = y_grid
levels = np.linspace(0, 1, 40)
fig = plt.figure()
ax = fig.gca(projection='3d')
# Removes the grey panes in 3d plots
ax.xaxis.set_pane_color((1.0, 1.0, 1.0, 0.0))
ax.yaxis.set_pane_color((1.0, 1.0, 1.0, 0.0))
ax.zaxis.set_pane_color((1.0, 1.0, 1.0, 0.0))
# The heatmap
ax.contourf(x_grid, y_grid, 0.1 * rv.pdf(pos),
zdir='z', levels=0.1 * levels, alpha=0.9)
# The wireframe
ax.plot_wireframe(x_grid, y_grid, rv.pdf(
pos), rstride=10, cstride=10, color='k')
# The scatter. Note that the altitude is defined based on the pdf of the
# random variable
ax.scatter(sample[:, 0], sample[:, 1], 1.05 * rv.pdf(sample), c='k')
ax.legend()
ax.set_title("Gaussian sample and pdf")
ax.set_xlim3d(-x_abs, x_abs)
ax.set_ylim3d(-y_abs, y_abs)
ax.set_zlim3d(0, 1)
plt.show()

Categories