Python - Bland-Altman Plot with Text Customization

Python - Bland-Altman Plot with Text Customization - python

I am trying to Create the Bland-Altman Plot with the text having on the left side of the plot instead of having it as the default configuration on the right hand side
This is my code
import pandas as pd
df = pd.DataFrame({'A': [5, 5, 5, 6, 6, 7, 7, 7, 8, 8, 9,
10, 11, 13, 14, 14, 15, 18, 22, 25],
'B': [4, 4, 5, 5, 5, 7, 8, 6, 9, 7, 7, 11,
13, 13, 12, 13, 14, 19, 19, 24]})
import statsmodels.api as sm
import matplotlib.pyplot as plt
#create Bland-Altman plot
f, ax = plt.subplots(1, figsize = (8,5))
sm.graphics.mean_diff_plot(df.A, df.B, ax = ax)
#display Bland-Altman plot
plt.show()
So I want to have the "mean", the "SD+" and the "SD-" on the left side of the X-axis, not on the right.
thanks for your help or any suggestions!

I don't know, but I can use pyplot so:
mean_diff = (df.A-df.B).mean()
diff_range = (df.A-df.B).std()*1.96
plt.figure(figsize = (9,6))
plt.scatter(df.A, df.A-df.B, alpha=.5)
plt.hlines(mean_diff, df.A.min()-2, df.A.max()+2, color="k", linewidth=1)
plt.text(
df.A.min()-1, mean_diff+.05*diff_range, "mean diff: %.2f"%mean_diff,
fontsize=13,
)
plt.hlines(
[mean_diff+diff_range, mean_diff-diff_range],
df.A.min()-2, df.A.max()+2, color="k", linewidth=1,
linestyle="--"
)
plt.text(
df.A.min()-1, mean_diff+diff_range+.05*diff_range,
"+SD1.96: %.2f"%(mean_diff+diff_range),
fontsize=13,
)
plt.text(
df.A.min()-1, mean_diff-diff_range+.05*diff_range,
"-SD1.96: %.2f"%(mean_diff-diff_range),
fontsize=13,
)
plt.xlim(df.A.min()-2, df.A.max()+2)
plt.ylim(mean_diff-diff_range*1.5, mean_diff+diff_range*1.5)
plt.xlabel("Means", fontsize=15)
plt.ylabel("Difference", fontsize=15)
plt.show()
result:

Related

How do I set the width of the rowLabels column on matplotlib?

It seems code below should set the width of the column containing the row labels. Yet it only sets the other columns.
I've also tried passing a colWidths argument but the result is the same.
import matplotlib.pyplot as plt
fig, ax = plt.subplots()
ax.axis('tight')
ax.axis('off')
colLabels = ['ColA', 'ColB', 'ColC', 'ColD', 'ColE']
rowLabels = ['Row1', 'Row2', 'Row3', 'Row4']
dados = [
[1, 2, 3, 4, 5],
[6, 7, 8, 9, 10],
[11, 12, 13, 14, 15],
[16, 17, 18, 19, 20],
]
table = ax.table(
cellText=dados, loc='center',
colLabels=colLabels,
rowLabels=rowLabels,
# `colWidths=[0.15, 0.15, 0.15, 0.15, 0.15, 0.15, ],` # <- same result
)
for cell in table.get_celld().values():
cell.set_width(.15)
print(cell.get_text().get_text()) # shows that the column is iterated through
plt.show()

The column containing the "Row" titles tries to automatically set the width. To stop this happening you can reset table._autoColumns to an empty list, e.g.,:
import matplotlib.pyplot as plt
fig, ax = plt.subplots()
ax.axis('tight')
ax.axis('off')
colLabels = ['ColA', 'ColB', 'ColC', 'ColD', 'ColE']
rowLabels = ['Row1', 'Row2', 'Row3', 'Row4']
dados = [
[1, 2, 3, 4, 5],
[6, 7, 8, 9, 10],
[11, 12, 13, 14, 15],
[16, 17, 18, 19, 20],
]
table = ax.table(
cellText=dados, loc='center',
colLabels=colLabels,
rowLabels=rowLabels,
# `colWidths=[0.15, 0.15, 0.15, 0.15, 0.15, 0.15, ],` # <- same result
)
table._autoColumns = [] # empty the _autoColumns
for cell in table.get_celld().values():
cell.set_width(.15)
print(cell.get_text().get_text()) # shows that the column is iterated through
plt.show()

How to hist() plot each data array row of a 2d NumPy array with Matplotlib?

I have a 3x10 2d ndarray that I would like to do a matplotlib hist plot. I want a hist plot of each array row in one subplot. I tried supplying the ndarray directly but discovered matplotlib would provide hist plots of each column of the ndarray, which is not what I want. How can I achieve my objective? Presently, I have to explicitly declare the hist() commands for each row and I would prefer to avoid this approach.
import numpy as np
import matplotlib.pyplot as plt
d = np.array([[1, 2, 2, 2, 3, 1, 3, 1, 2, 4, 5],
[4, 4, 5, 5, 3, 6, 6, 7, 6, 5, 7],
[5, 6, 7, 7, 8, 8, 9, 10, 11, 12, 10]] )
print( '\nd', d )
fig, ax = plt.subplots(4, 1)
dcount, dbins, dignored = ax[0].hist( d, bins=[2, 4, 6, 8, 10, 12], histtype='bar', label='d' )
d0count, d0bins, d0ignored = ax[1].hist( d[0,:], bins=[2, 4, 6, 8, 10, 12], histtype='bar', label='d0', alpha=0.2 )
d1count, d1bins, d1ignored = ax[2].hist( d[1,:], bins=[2, 4, 6, 8, 10, 12], histtype='bar', label='d1', alpha=0.2 )
d2count, d2bins, d2ignored = ax[3].hist( d[2,:], bins=[2, 4, 6, 8, 10, 12], histtype='bar', label='d2', alpha=0.2 )
ax[0].legend()
ax[1].legend()
ax[2].legend()
ax[3].legend()
print( '\ndcount', dcount )
print( '\ndbins', dbins )
print( '\ndignored', dignored )
print( '\nd0count', d0count )
print( '\nd0bins', d0bins )
print( '\nd0ignored', d0ignored )
print( '\nd1count', d0count )
print( '\nd1bins', d0bins )
print( '\nd1ignored', d0ignored )
print( '\nd2count', d0count )
print( '\nd2bins', d0bins )
print( '\nd2ignored', d0ignored )
plt.show()

# import needed packages
import numpy as np
import matplotlib.pyplot as plt
Create data to plot
Using list comprehension and numpy.random.normal:
gaussian0=[np.random.normal(loc=0, scale=1.5) for _ in range(100)]
gaussian1=[np.random.normal(loc=2, scale=0.5) for _ in range(100)]
gaussians = [gaussian0, gaussian1]
Plot with one hist call only
for gaussian in gaussians:
plt.hist(gaussian,alpha=0.5)
plt.show()
Resulting in:

I found a simpler way. Transpose d. That is, replace
dcount, dbins, dignored = ax[0].hist( d, bins=[2, 4, 6, 8, 10, 12], histtype='bar', label='d' )
with
dcount, dbins, dignored = ax[0].hist( d.T, bins=[2, 4, 6, 8, 10, 12], histtype='bar', label=['d0', 'd1','d2'], alpha=0.5 )
I was hoping for matplotlib's hist() command would have some command to do it but did not find it. Transposing the numpy array worked. I wonder if this is the usual way matplotlib user to do so?

Issue with plotting normal distribution curve with available set of values

I'm trying to plot a normal distribution curve for a set of values. Unfortunately, the below code (taken from this post) doesn't seem to be plotting the curve correctly over the histograms (please refer attached image). I'm sure I'm missing something or have done something silly but can't seem to figure out. Can someone please help? I've included my code below - I'm getting the values from a dataframe but have included these as a list s for convenience:
import numpy as np
import scipy
import pandas as pd
from scipy.stats import norm
import matplotlib.pyplot as plt
from matplotlib.mlab import normpdf
mu = 0
sigma = 1
n_bins = 50
s = [8, 8, 4, 4, 1, 14, 0, 10, 1, 4, 21, 9, 5, 2, 7, 6, 7, 9, 7, 3, 3, 4, 7, 9, 9, 4, 10, 8, 10, 10, 7, 10, 1, 8, 7, 8, 1, 7, 4, 15, 8, 1, 1, 6, 7, 3, 8, 8, 8, 4][![enter image description here][1]][1]
fig, axes = plt.subplots(nrows=2, ncols=1, sharex=True)
#histogram
n, bins, patches = axes[1].hist(s, n_bins, normed=True, alpha=.1, edgecolor='black' )
pdf = 1/(sigma*np.sqrt(2*np.pi))*np.exp(-(bins-mu)**2/(2*sigma**2))
print(pdf)
median, q1, q3 = np.percentile(s, 50), np.percentile(s, 25), np.percentile(s, 75)
#probability density function
axes[1].plot(bins, pdf, color='orange', alpha=.6)
#to ensure pdf and bins line up to use fill_between.
bins_1 = bins[(bins >= q1-1.5*(q3-q1)) & (bins <= q1)] # to ensure fill starts from Q1-1.5*IQR
bins_2 = bins[(bins <= q3+1.5*(q3-q1)) & (bins >= q3)]
pdf_1 = pdf[:int(len(pdf)/2)]
pdf_2 = pdf[int(len(pdf)/2):]
pdf_1 = pdf_1[(pdf_1 >= norm(mu,sigma).pdf(q1-1.5*(q3-q1))) & (pdf_1 <= norm(mu,sigma).pdf(q1))]
pdf_2 = pdf_2[(pdf_2 >= norm(mu,sigma).pdf(q3+1.5*(q3-q1))) & (pdf_2 <= norm(mu,sigma).pdf(q3))]
#fill from Q1-1.5*IQR to Q1 and Q3 to Q3+1.5*IQR
#axes[1].fill_between(bins_1, pdf_1, 0, alpha=.6, color='orange')
#axes[1].fill_between(bins_2, pdf_2, 0, alpha=.6, color='orange')
#add text to bottom graph.
axes[1].annotate("{:.1f}%".format(100*norm(mu, sigma).cdf(q1)), xy=((q1-1.5*(q3-q1)+q1)/2, 0), ha='center')
axes[1].annotate("{:.1f}%".format(100*(norm(mu, sigma).cdf(q3)-norm(mu, sigma).cdf(q1))), xy=(median, 0), ha='center')
axes[1].annotate("{:.1f}%".format(100*(norm(mu, sigma).cdf(q3+1.5*(q3-q1)-q3)-norm(mu, sigma).cdf(q3))), xy=((q3+1.5*(q3-q1)+q3)/2, 0), ha='center')
axes[1].annotate('q1', xy=(q1, norm(mu, sigma).pdf(q1)), ha='center')
axes[1].annotate('q3', xy=(q3, norm(mu, sigma).pdf(q3)), ha='center')
axes[1].set_ylabel('Probability Density')
#top boxplot
axes[0].boxplot(s, 0, 'gD', vert=False)
axes[0].axvline(median, color='orange', alpha=.6, linewidth=.5)
axes[0].axis('off')
plt.rcParams["figure.figsize"] = (10,10)
plt.subplots_adjust(hspace=0)
plt.show()

You have set mu and sigma arbitrarily to 0 and 1 respectively but you should calculate it for your actual data:
data = pd.Series(s)
mu = data.mean()
sigma = data.std()
Update with full working example:
import numpy as np
import scipy
import pandas as pd
from scipy.stats import norm
import matplotlib.pyplot as plt
n_bins = 50
s = [8, 8, 4, 4, 1, 14, 0, 10, 1, 4, 21, 9, 5, 2, 7, 6, 7, 9, 7, 3, 3, 4, 7, 9, 9, 4, 10, 8, 10, 10, 7, 10, 1, 8, 7, 8, 1, 7, 4, 15, 8, 1, 1, 6, 7, 3, 8, 8, 8, 4]
fig, axes = plt.subplots(nrows=2, ncols=1, sharex=True)
#histogram
n, bins, patches = axes[1].hist(s, n_bins, density=True, alpha=.1, edgecolor='black' )
data = pd.Series(s)
mu = data.mean()
sigma = data.std()
pdf = 1/(sigma*np.sqrt(2*np.pi))*np.exp(-(bins-mu)**2/(2*sigma**2))
median, q1, q3 = np.percentile(s, 50), np.percentile(s, 25), np.percentile(s, 75)
#probability density function
axes[1].plot(bins, pdf, color='orange', alpha=.6)
#fill from Q1-1.5*IQR to Q1 and Q3 to Q3+1.5*IQR
iqr = 1.5 * (q3-q1)
x1 = np.linspace(q1 - iqr, q1)
x2 = np.linspace(q3, q3 + iqr)
pdf1 = 1/(sigma*np.sqrt(2*np.pi))*np.exp(-(x1-mu)**2/(2*sigma**2))
pdf2 = 1/(sigma*np.sqrt(2*np.pi))*np.exp(-(x2-mu)**2/(2*sigma**2))
axes[1].fill_between(x1, pdf1, 0, alpha=.6, color='orange')
axes[1].fill_between(x2, pdf2, 0, alpha=.6, color='orange')
#add text to bottom graph.
axes[1].annotate("{:.1f}%".format(100*(norm(mu, sigma).cdf(q1) -norm(mu, sigma).cdf(q1-iqr))), xy=(q1-iqr/2, 0), ha='center')
axes[1].annotate("{:.1f}%".format(100*(norm(mu, sigma).cdf(q3) -norm(mu, sigma).cdf(q1) )), xy=(median , 0), ha='center')
axes[1].annotate("{:.1f}%".format(100*(norm(mu, sigma).cdf(q3+iqr)-norm(mu, sigma).cdf(q3) )), xy=(q3+iqr/2, 0), ha='center')
axes[1].annotate('q1', xy=(q1, norm(mu, sigma).pdf(q1)), ha='center')
axes[1].annotate('q3', xy=(q3, norm(mu, sigma).pdf(q3)), ha='center')
axes[1].set_ylabel('Probability Density')
#top boxplot
axes[0].boxplot(s, 0, 'gD', vert=False)
axes[0].axvline(median, color='orange', alpha=.6, linewidth=.5)
axes[0].axis('off')

Putting it all in a fuction:
# import warnings filter
from warnings import simplefilter
# ignore all future warnings
simplefilter(action='ignore', category=FutureWarning)
def CTD(df):
for col in df.columns:
n_bins = 50
fig, axes = plt.subplots(nrows=2, ncols=1, sharex=True)
#histogram
n, bins, patches = axes[1].hist(boston[col], n_bins, density=True, alpha=.1, edgecolor='black' )
#data = pd.Series(s)
mu = boston[col].mean()
sigma = boston[col].std()
pdf = 1/(sigma*np.sqrt(2*np.pi))*np.exp(-(bins-mu)**2/(2*sigma**2))
median, q1, q3 = np.percentile(boston.age, 50), np.percentile(boston[col], 25), np.percentile(boston[col], 75)
#probability density function
axes[1].plot(bins, pdf, color='orange', alpha=.6)
#axes[1].figsize=(10,20)
#fill from Q1-1.5*IQR to Q1 and Q3 to Q3+1.5*IQR
iqr = 1.5 * (q3-q1)
x1 = np.linspace(q1 - iqr, q1)
x2 = np.linspace(q3, q3 + iqr)
pdf1 = 1/(sigma*np.sqrt(2*np.pi))*np.exp(-(x1-mu)**2/(2*sigma**2))
pdf2 = 1/(sigma*np.sqrt(2*np.pi))*np.exp(-(x2-mu)**2/(2*sigma**2))
axes[1].fill_between(x1, pdf1, 0, alpha=.6, color='orange')
axes[1].fill_between(x2, pdf2, 0, alpha=.6, color='orange')
#add text to bottom graph.
axes[1].annotate("{:.1f}%".format(100*(norm(mu, sigma).cdf(q1) -norm(mu, sigma).cdf(q1-iqr))), xy=(q1-iqr/2, 0), ha='center')
axes[1].annotate("{:.1f}%".format(100*(norm(mu, sigma).cdf(q3) -norm(mu, sigma).cdf(q1) )), xy=(median , 0), ha='center')
axes[1].annotate("{:.1f}%".format(100*(norm(mu, sigma).cdf(q3+iqr)-norm(mu, sigma).cdf(q3) )), xy=(q3+iqr/2, 0), ha='center')
axes[1].annotate('q1', xy=(q1, norm(mu, sigma).pdf(q1)), ha='center')
axes[1].annotate('q3', xy=(q3, norm(mu, sigma).pdf(q3)), ha='center')
#dashed lines
plt.axvline(df[col].quantile(0),color='b', linestyle='-.')
plt.axvline(df[col].quantile(0.25),color='g', linestyle='--')
plt.axvline(df[col].quantile(0.50),color='g', linestyle='--')
plt.axvline(df[col].quantile(0.75),color='b', linestyle='--')
plt.axvline(df[col].quantile(1),color='r', linestyle='-.')
axes[1].set_ylabel('Probability Density')
#top boxplot
axes[0].boxplot(df[col], 0, 'gD', vert=False)
axes[0].axvline(median, color='orange', alpha=.6, linewidth=.5)
axes[0].axis('off')
plt.rcParams["figure.figsize"] = (18,10)
calling function:
CTD(boston)
If this doesn't work for you:
Try this:
# import warnings filter
from warnings import simplefilter
# ignore all future warnings
simplefilter(action='ignore', category=FutureWarning)
def CTD(df):
for col in df.columns:
sns.set(rc={'figure.figsize':(24,6)})
plt.figure()
plt.subplot(121)
sns.distplot(df[col])
plt.axvline(np.mean(df[col]),color='b', linestyle='--') # Blue line for mean
plt.axvline(np.median(df[col]),color='r', linestyle='--')# Red line for Median
plt.subplot(122)
sns.distplot(df[col])
plt.axvline(df[col].quantile(0),color='b', linestyle='-.')
plt.axvline(df[col].quantile(0.25),color='g', linestyle='--')
plt.axvline(df[col].quantile(0.50),color='g', linestyle='--')
plt.axvline(df[col].quantile(0.75),color='b', linestyle='--')
plt.axvline(df[col].quantile(1),color='r', linestyle='-.')
This Create dashed lines on KDE plot having quantiles.

bar graph with wrong width

I want to create a bar graph for a dataframe contains multiple categories, with a different color for each category. Below is my simplified code and resulting graph. The top subplot is a regular bar graph in one color, the bottom subplot is color coded but the bar width is messed up. Any suggestions? Thanks!
import random
import pandas as pd
import matplotlib.pyplot as plt
df = pd.DataFrame({'Cat': [1, 1, 2, 2, 2, 2, 3, 3, 4, 4, 4, 4],
'A': [2, 3, 6, 7, 9, 10, 15, 18, 22, 23, 24, 25],
'B': random.sample(range(1, 20), 12)})
fig = plt.figure(figsize=(15, 15/2.3))
ax = plt.subplot(2, 1, 1)
plt.bar(df.A, df.B)
plt.xlim(0, 30)
ax = plt.subplot(2, 1, 2)
for cat in df.Cat.unique():
df_ = df.loc[(df.Cat==cat), :]
plt.bar(df_.A, df_.B, width=0.5)
plt.xlim(0, 30)
plt.show()

Matplotlib twinx for different scales

I am using matplotlib twinx for graphing multiple variables on the same axes. But I have a problem, for which I can't find a solution. For simplicity, I have attached little code and graph plotted by that code below.
In this picture, I need those bars to be displayed at the bottom of axes as shown in picture 2. But in picture 2, yticks of ax1t remained as the same. I also need them to be displayed at the bottom. How can I do that?
Code:
import matplotlib.pyplot as plt
import numpy as np
fig, ax1 = plt.subplots()
ax1.plot([4, 2, 8, 6, 4, 7, 3, 5])
ax1t = ax1.twinx()
ax1t.bar(np.arange(8), [45, 42, 55, 36, 58, 45, 48, 62], alpha=0.4)
plt.show()
Picture 2

I guess this is what you want -
import matplotlib.pyplot as plt
import numpy as np
fig, ax1 = plt.subplots()
ax1.plot([4, 2, 8, 6, 4, 7, 3, 5])
ax1t = ax1.twinx()
ax1t.bar(np.arange(8), [45, 42, 55, 36, 58, 45, 48, 62], alpha=0.4)
ax1t.set_ylim([10,500])
ax1t.set_yticks([10, 50, 90])
plt.show()
Change the y axis scale using set_ylim and then explicitly pass the y ticks using set_yticks. You play around with the parameters to adjust it according to your convenience.

from matplotlib examples
import matplotlib.pyplot as plt
import numpy as np
f, (ax1, ax2) = plt.subplots(2, sharex=True, sharey=True)
ax1.plot([4, 2, 8, 6, 4, 7, 3, 5])
ax2.bar(np.arange(8), [45, 42, 55, 36, 58, 45, 48, 62], alpha=0.4)
# Fine-tune figure; make subplots close to each other and hide x ticks for
# all but bottom plot.
f.subplots_adjust(hspace=0)
plt.setp([a.get_xticklabels() for a in f.axes[:-1]], visible=False)
plt.show()

You could also use Plotly library, which could easily do that with great visualisation.
import plotly.plotly as py
import plotly.graph_objs as go
trace1 = go.Scatter(
x=[0, 1, 2, 3, 4, 5],
y=[1.5, 1, 1.3, 0.7, 0.8, 0.9]
)
trace2 = go.Bar(
x=[0, 1, 2, 3, 4, 5],
y=[1, 0.5, 0.7, -1.2, 0.3, 0.4]
)
data = [trace1, trace2]
py.iplot(data, filename='bar-line')
Result (it is .png format, therefore not interactive)

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

Python - Bland-Altman Plot with Text Customization - python

Related

How do I set the width of the rowLabels column on matplotlib?

How to hist() plot each data array row of a 2d NumPy array with Matplotlib?

Issue with plotting normal distribution curve with available set of values

bar graph with wrong width

Matplotlib twinx for different scales

Categories

Resources