Formatting a plot in Seaborn - python

I made a PMF plot using seaborn:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib.patches as mpatches
n= 1000 #number of trials
p= 0.5 #probability
trial_2 = np.random.binomial(n,p,1000)
sns.displot(trial_2, stat = 'probability')
trial_2_mean= np.mean(trial_2)
plt.axvline(trial_2_mean,color='red')
plt.xlabel("Number of Successes")
red_patch = mpatches.Patch(color='red', label='Mean')
plt.legend(handles=[red_patch])
I want to add text to the plot like below (the n=60 and p=0.1):
Also how do I plot in a format similar to the one in the picture (straight lines)

You can do following:
from scipy.stats import binom
n = 50
p = 0.1
x = [x for x in range(15)]
trial_2 = binom.pmf(x, n, p)
sns.scatterplot(x, trial_2,label=('$n=50, p=0.1$'))
plt.vlines(x, 0, trial_2, colors='red', lw=3, alpha=0.4)
plt.xticks(x)
plt.ylabel('Probability')
plt.xlabel('Number of Successes')
plt.show()
Produces:

Related

How can I plot with a fewer markers than the data points?

My data on x goes from 3MHz to 12MHz, I don't want show all those points on the x-axis instead I want to show an interval of from 3MHz to 12MHz spaced out one 1MHz a part.
Here is an example code.
import numpy as np
import matplotlib.pyplot as plt
x_array = np.arange(3000000, 12000000)
y_array = np.arange(3000000, 12000000)
plt.plot(x_array, y_array)
plt.show()
I want the x-axis first marker to to be 3MHz second marker to be 4MHz and so on up to 12MHz.
You want to have MHz on the x-axis? then use mega Hertz in the definition of the x array and multiply by 10⁶ when you use the array as a frequency in a subsequent calculation
import numpy as np
import matplotlib.pyplot as plt
x = np.linspace(3, 12, 901)
f = x*1E6
def H(f) : return f
plt.plot(x, H(f))
plt.xticks(range(3, 13))
plt.xlabel('Frequency/MHz')
plt.ylabel('Transfer function')
plt.grid()
plt.show()
You can change the matplotlib xticks using the following:
plt.xticks(np.arange(3000000, 12000000, step=1000000))
Defining the step will ensure you have 1MHz space.
You can find more here:
https://numpy.org/doc/stable/reference/generated/numpy.arange.html
https://matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.xticks.html
Edit:
If you want to have the MHz on the x ticks, you can do the following:
import numpy as np
import matplotlib.pyplot as plt
x_array = np.arange(3000000, 12000000)
y_array = np.arange(3000000, 12000000)
plt.plot(x_array, y_array)
plt.xticks(ticks=np.arange(3, 13)*1e6, labels=[str(t) + "MHz" for t in np.arange(3, 13)])
plt.show()
Output: https://i.stack.imgur.com/JdIM3.png
Otherwise, you can do as mentioned in the other answer:
import numpy as np
import matplotlib.pyplot as plt
x_array = np.arange(3000000, 12000000)
y_array = np.arange(3000000, 12000000)
plt.plot(x_array, y_array)
plt.xticks(ticks=np.arange(3, 13)*1e6, labels=np.arange(3, 13))
plt.xlabel("Frequency (MHz)")
plt.show()
Output: https://i.stack.imgur.com/Vc1gb.png2

Fixing mollweide matplotlib projection contours

I am having problems in making mollweide plots in the borders. The lines do not continue on the other side of the plot.
Is there any way to fix this (the green curve should continue in the other side of the sphere )? I am using matplotlib projections. The code is plotting circles of known radius and known center but matplotlib is just cutting the lines. How I could solve this?
import math
import numpy as np
import getdist.plots as plots
import matplotlib.pyplot as plt
import matplotlib.ticker
import matplotlib
import scipy
import pandas as pd
from scipy.stats import norm
from matplotlib import rc
from getdist import loadMCSamples
from getdist import loadMCSamples
from getdist import covmat
from getdist import MCSamples
from tabulate import tabulate
from scipy.optimize import curve_fit
from matplotlib.projections.geo import GeoAxes
from mpl_toolkits.mplot3d import Axes3D
class ThetaFormatterShiftPi(GeoAxes.ThetaFormatter):
"""Shifts labelling by pi
Shifts labelling from -180,180 to 0-360"""
def __call__(self, x, pos=None):
if x != 0:
x *= -1
if x < 0:
x += 2*np.pi
return GeoAxes.ThetaFormatter.__call__(self, x, pos)
mean1024 = [1,186,48]
sigma1024 = 30
x = np.linspace(-6.0, 6.0, 100)
y = np.linspace(-6.0, 6.0, 100)
X, Y = np.meshgrid(x,y)
l = (360.-mean1024[1])/(180/np.pi)
b = (mean1024[2])/(180/np.pi)
F = (X-l)**2 + (Y-b)**2 - (sigma1024/(180/np.pi))**2
F2 = (X-l)**2 + (Y-b)**2 - (2*sigma1024/(180/np.pi))**2
fig, axs = plt.subplots(figsize=(15,10))
axs = plt.subplot(projection="mollweide")
axs.set_longitude_grid(45)
axs.xaxis.set_major_formatter(ThetaFormatterShiftPi(45))
axs.set_latitude_grid(45)
axs.set_longitude_grid_ends(90)
plt.grid(True)
axs.contour(X,Y,F,[0], linewidths=1.5, colors = ['g'])
axs.contour(X,Y,F2,[0], linewidths=1.5, colors = ['g'])
plt.plot(l, b, '+', color = 'green')
box = axs.get_position()
axs.set_position([box.x0, box.y0, box.width * 0.8, box.height*0.8])
axs.legend(loc='lower right', bbox_to_anchor=(1.1, -0.2))
fig.savefig('circles.png')
plt.close()

How can I do a histogram with 1D gaussian mixture with sklearn?

I would like to do an histogram with mixture 1D gaussian as the picture.
Thanks Meng for the picture.
My histogram is this:
I have a file with a lot of data (4,000,000 of numbers) in a column:
1.727182
1.645300
1.619943
1.709263
1.614427
1.522313
And I'm using the follow script with modifications than Meng and Justice Lord have done :
from matplotlib import rc
from sklearn import mixture
import matplotlib.pyplot as plt
import numpy as np
import matplotlib
import matplotlib.ticker as tkr
import scipy.stats as stats
x = open("prueba.dat").read().splitlines()
f = np.ravel(x).astype(np.float)
f=f.reshape(-1,1)
g = mixture.GaussianMixture(n_components=3,covariance_type='full')
g.fit(f)
weights = g.weights_
means = g.means_
covars = g.covariances_
plt.hist(f, bins=100, histtype='bar', density=True, ec='red', alpha=0.5)
plt.plot(f,weights[0]*stats.norm.pdf(f,means[0],np.sqrt(covars[0])), c='red')
plt.rcParams['agg.path.chunksize'] = 10000
plt.grid()
plt.show()
And when I run the script, I have the follow plot:
So, I don't have idea how put the start and end of all gaussians that must be there. I'm new in python and I'm confuse with the way to use the modules. Please, Can you help me and guide me how can I do this plot?
Thanks a lot
Although this is a reasonably old thread, I would like to provide my take on it. I believe my answer can be more comprehensible to some. Moreover, I include a test to check whether or not the desired number of components makes statistical sense via the BIC criterion.
# import libraries (some are for cosmetics)
import matplotlib.pyplot as plt
import numpy as np
from scipy import stats
from matplotlib.ticker import (MultipleLocator, FormatStrFormatter, AutoMinorLocator)
import astropy
from scipy.stats import norm
from sklearn.mixture import GaussianMixture as GMM
import matplotlib as mpl
mpl.rcParams['axes.linewidth'] = 1.5
mpl.rcParams.update({'font.size': 15, 'font.family': 'STIXGeneral', 'mathtext.fontset': 'stix'})
# create the data as in #Meng's answer
x = np.concatenate((np.random.normal(5, 5, 1000), np.random.normal(10, 2, 1000)))
x = x.reshape(-1, 1)
# first of all, let's confirm the optimal number of components
bics = []
min_bic = 0
counter=1
for i in range (10): # test the AIC/BIC metric between 1 and 10 components
gmm = GMM(n_components = counter, max_iter=1000, random_state=0, covariance_type = 'full')
labels = gmm.fit(x).predict(x)
bic = gmm.bic(x)
bics.append(bic)
if bic < min_bic or min_bic == 0:
min_bic = bic
opt_bic = counter
counter = counter + 1
# plot the evolution of BIC/AIC with the number of components
fig = plt.figure(figsize=(10, 4))
ax = fig.add_subplot(1,2,1)
# Plot 1
plt.plot(np.arange(1,11), bics, 'o-', lw=3, c='black', label='BIC')
plt.legend(frameon=False, fontsize=15)
plt.xlabel('Number of components', fontsize=20)
plt.ylabel('Information criterion', fontsize=20)
plt.xticks(np.arange(0,11, 2))
plt.title('Opt. components = '+str(opt_bic), fontsize=20)
# Since the optimal value is n=2 according to both BIC and AIC, let's write down:
n_optimal = opt_bic
# create GMM model object
gmm = GMM(n_components = n_optimal, max_iter=1000, random_state=10, covariance_type = 'full')
# find useful parameters
mean = gmm.fit(x).means_
covs = gmm.fit(x).covariances_
weights = gmm.fit(x).weights_
# create necessary things to plot
x_axis = np.arange(-20, 30, 0.1)
y_axis0 = norm.pdf(x_axis, float(mean[0][0]), np.sqrt(float(covs[0][0][0])))*weights[0] # 1st gaussian
y_axis1 = norm.pdf(x_axis, float(mean[1][0]), np.sqrt(float(covs[1][0][0])))*weights[1] # 2nd gaussian
ax = fig.add_subplot(1,2,2)
# Plot 2
plt.hist(x, density=True, color='black', bins=np.arange(-100, 100, 1))
plt.plot(x_axis, y_axis0, lw=3, c='C0')
plt.plot(x_axis, y_axis1, lw=3, c='C1')
plt.plot(x_axis, y_axis0+y_axis1, lw=3, c='C2', ls='dashed')
plt.xlim(-10, 20)
#plt.ylim(0.0, 2.0)
plt.xlabel(r"X", fontsize=20)
plt.ylabel(r"Density", fontsize=20)
plt.subplots_adjust(wspace=0.3)
plt.show()
plt.close('all')
It's all about reshape.
First, you need to reshape f.
For pdf, reshape before using stats.norm.pdf. Similarly, sort and reshape before plotting.
from matplotlib import rc
from sklearn import mixture
import matplotlib.pyplot as plt
import numpy as np
import matplotlib
import matplotlib.ticker as tkr
import scipy.stats as stats
# x = open("prueba.dat").read().splitlines()
# create the data
x = np.concatenate((np.random.normal(5, 5, 1000),np.random.normal(10, 2, 1000)))
f = np.ravel(x).astype(np.float)
f=f.reshape(-1,1)
g = mixture.GaussianMixture(n_components=3,covariance_type='full')
g.fit(f)
weights = g.weights_
means = g.means_
covars = g.covariances_
plt.hist(f, bins=100, histtype='bar', density=True, ec='red', alpha=0.5)
f_axis = f.copy().ravel()
f_axis.sort()
plt.plot(f_axis,weights[0]*stats.norm.pdf(f_axis,means[0],np.sqrt(covars[0])).ravel(), c='red')
plt.rcParams['agg.path.chunksize'] = 10000
plt.grid()
plt.show()

Matplotlib discretize colorbar between given values

I have the plot bellow and I would like to discretize the colormap between 0 and 20. Could anyone help with that?
Here is the code:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import BoundaryNorm
from matplotlib.ticker import MaxNLocator
epi='epi'
with open(epi, 'r') as f2:
lines = f2.readlines()
data = [line.split() for line in lines]
a = np.array(data)
print a.shape
lat = a[:,0]
lat1=list(lat)
lat2=np.asarray(lat1).astype(float)
lon = a[:,1]
lon1=list(lon)
lon2=np.asarray(lon).astype(float)
x_space = 60
y_space = x_space*1.7
gridx = np.linspace(-8.8, -7.0, x_space)
gridy = np.linspace(38, 39.5, y_space )
grid, _, _ = np.histogram2d(lat2, lon2, bins=[gridy, gridx])
cmap = plt.get_cmap('hot_r')
plt.figure()
plt.axis((-8.8,-7.0,38.2,39))
plt.pcolormesh(gridx, gridy, grid,cmap=cmap)
plt.colorbar()
plt.show()
If you want a coarsely discretized colormap, you can change your get_cmap call and include the number of different (discrete) colors you want:
import matplotlib.pylab as pl
import numpy as np
data = np.random.random([10,10]) * 40
hot2 = pl.cm.get_cmap('hot', 20)
pl.figure()
pl.subplot(121)
pl.pcolormesh(data, cmap=pl.cm.hot, vmin=0, vmax=20)
pl.colorbar()
pl.subplot(122)
pl.pcolormesh(data, cmap=hot2, vmin=0, vmax=20)
pl.colorbar()

Reproduce two distributions as provided on a single plot using Python

I want to draw distributions like shown in figure below -- tail of distributions. I have tried following but not quite getting there:
import matplotlib.pyplot as plt
import numpy as np
import matplotlib.mlab as mlab
import math
mean1 = 0
variance1 = 1
sigma1 = math.sqrt(variance1)
x = np.linspace(-3,3.5,100, endpoint=True)
plt.plot(x,mlab.normpdf(x,mean1,sigma1))
mean2 = 0.4
variance2 = 2
sigma2 = math.sqrt(variance2)
y = np.linspace(-4,3.5,100, endpoint=False)
plt.plot(x,mlab.normpdf(y,mean2,sigma2))
##plt.axis('off')
plt.yticks([])
plt.xticks([])
plt.show()
Any suggestions would be appreciative?
You want fill_between:
import matplotlib.pyplot as plt
import numpy as np
import matplotlib.mlab as mlab
import math
mean1 = 0
variance1 = 1
sigma1 = math.sqrt(variance1)
x = np.linspace(-3,3.5,100, endpoint=True)
y1 = mlab.normpdf(x,mean1,sigma1)
fig, ax = plt.subplots()
ax.plot(x,y1)
mean2 = 0.4
variance2 = 2
sigma2 = math.sqrt(variance2)
y = np.linspace(-4,3.5,100, endpoint=False)
y2 = mlab.normpdf(y,mean2,sigma2)
ax.plot(x,y2)
ax.fill_between(x[:30], y1[:30], color='blue')
ax.fill_between(x[:30], y2[:30], color='green')
ax.fill_between(x[-30:], y1[-30:], y2[-30:], color='red', alpha=0.5)
ax.set_yticks([])
ax.set_xticks([])
plt.savefig('fill_norms.png')
plt.show()
This is a crazy simple example -- see the cookbook examples and look at the where clause; your fill-between highlights can adapt to changes in the lines you're plotting (e.g., an automatic red fill_between everywhere BADTHING exceeds GOODTHING, without your having to figure out the index (30 or -30 in this example)).

Categories