Issue with plotting normal distribution curve with available set of values - python

I'm trying to plot a normal distribution curve for a set of values. Unfortunately, the below code (taken from this post) doesn't seem to be plotting the curve correctly over the histograms (please refer attached image). I'm sure I'm missing something or have done something silly but can't seem to figure out. Can someone please help? I've included my code below - I'm getting the values from a dataframe but have included these as a list s for convenience:
import numpy as np
import scipy
import pandas as pd
from scipy.stats import norm
import matplotlib.pyplot as plt
from matplotlib.mlab import normpdf
mu = 0
sigma = 1
n_bins = 50
s = [8, 8, 4, 4, 1, 14, 0, 10, 1, 4, 21, 9, 5, 2, 7, 6, 7, 9, 7, 3, 3, 4, 7, 9, 9, 4, 10, 8, 10, 10, 7, 10, 1, 8, 7, 8, 1, 7, 4, 15, 8, 1, 1, 6, 7, 3, 8, 8, 8, 4][![enter image description here][1]][1]
fig, axes = plt.subplots(nrows=2, ncols=1, sharex=True)
#histogram
n, bins, patches = axes[1].hist(s, n_bins, normed=True, alpha=.1, edgecolor='black' )
pdf = 1/(sigma*np.sqrt(2*np.pi))*np.exp(-(bins-mu)**2/(2*sigma**2))
print(pdf)
median, q1, q3 = np.percentile(s, 50), np.percentile(s, 25), np.percentile(s, 75)
#probability density function
axes[1].plot(bins, pdf, color='orange', alpha=.6)
#to ensure pdf and bins line up to use fill_between.
bins_1 = bins[(bins >= q1-1.5*(q3-q1)) & (bins <= q1)] # to ensure fill starts from Q1-1.5*IQR
bins_2 = bins[(bins <= q3+1.5*(q3-q1)) & (bins >= q3)]
pdf_1 = pdf[:int(len(pdf)/2)]
pdf_2 = pdf[int(len(pdf)/2):]
pdf_1 = pdf_1[(pdf_1 >= norm(mu,sigma).pdf(q1-1.5*(q3-q1))) & (pdf_1 <= norm(mu,sigma).pdf(q1))]
pdf_2 = pdf_2[(pdf_2 >= norm(mu,sigma).pdf(q3+1.5*(q3-q1))) & (pdf_2 <= norm(mu,sigma).pdf(q3))]
#fill from Q1-1.5*IQR to Q1 and Q3 to Q3+1.5*IQR
#axes[1].fill_between(bins_1, pdf_1, 0, alpha=.6, color='orange')
#axes[1].fill_between(bins_2, pdf_2, 0, alpha=.6, color='orange')
#add text to bottom graph.
axes[1].annotate("{:.1f}%".format(100*norm(mu, sigma).cdf(q1)), xy=((q1-1.5*(q3-q1)+q1)/2, 0), ha='center')
axes[1].annotate("{:.1f}%".format(100*(norm(mu, sigma).cdf(q3)-norm(mu, sigma).cdf(q1))), xy=(median, 0), ha='center')
axes[1].annotate("{:.1f}%".format(100*(norm(mu, sigma).cdf(q3+1.5*(q3-q1)-q3)-norm(mu, sigma).cdf(q3))), xy=((q3+1.5*(q3-q1)+q3)/2, 0), ha='center')
axes[1].annotate('q1', xy=(q1, norm(mu, sigma).pdf(q1)), ha='center')
axes[1].annotate('q3', xy=(q3, norm(mu, sigma).pdf(q3)), ha='center')
axes[1].set_ylabel('Probability Density')
#top boxplot
axes[0].boxplot(s, 0, 'gD', vert=False)
axes[0].axvline(median, color='orange', alpha=.6, linewidth=.5)
axes[0].axis('off')
plt.rcParams["figure.figsize"] = (10,10)
plt.subplots_adjust(hspace=0)
plt.show()

You have set mu and sigma arbitrarily to 0 and 1 respectively but you should calculate it for your actual data:
data = pd.Series(s)
mu = data.mean()
sigma = data.std()
Update with full working example:
import numpy as np
import scipy
import pandas as pd
from scipy.stats import norm
import matplotlib.pyplot as plt
n_bins = 50
s = [8, 8, 4, 4, 1, 14, 0, 10, 1, 4, 21, 9, 5, 2, 7, 6, 7, 9, 7, 3, 3, 4, 7, 9, 9, 4, 10, 8, 10, 10, 7, 10, 1, 8, 7, 8, 1, 7, 4, 15, 8, 1, 1, 6, 7, 3, 8, 8, 8, 4]
fig, axes = plt.subplots(nrows=2, ncols=1, sharex=True)
#histogram
n, bins, patches = axes[1].hist(s, n_bins, density=True, alpha=.1, edgecolor='black' )
data = pd.Series(s)
mu = data.mean()
sigma = data.std()
pdf = 1/(sigma*np.sqrt(2*np.pi))*np.exp(-(bins-mu)**2/(2*sigma**2))
median, q1, q3 = np.percentile(s, 50), np.percentile(s, 25), np.percentile(s, 75)
#probability density function
axes[1].plot(bins, pdf, color='orange', alpha=.6)
#fill from Q1-1.5*IQR to Q1 and Q3 to Q3+1.5*IQR
iqr = 1.5 * (q3-q1)
x1 = np.linspace(q1 - iqr, q1)
x2 = np.linspace(q3, q3 + iqr)
pdf1 = 1/(sigma*np.sqrt(2*np.pi))*np.exp(-(x1-mu)**2/(2*sigma**2))
pdf2 = 1/(sigma*np.sqrt(2*np.pi))*np.exp(-(x2-mu)**2/(2*sigma**2))
axes[1].fill_between(x1, pdf1, 0, alpha=.6, color='orange')
axes[1].fill_between(x2, pdf2, 0, alpha=.6, color='orange')
#add text to bottom graph.
axes[1].annotate("{:.1f}%".format(100*(norm(mu, sigma).cdf(q1) -norm(mu, sigma).cdf(q1-iqr))), xy=(q1-iqr/2, 0), ha='center')
axes[1].annotate("{:.1f}%".format(100*(norm(mu, sigma).cdf(q3) -norm(mu, sigma).cdf(q1) )), xy=(median , 0), ha='center')
axes[1].annotate("{:.1f}%".format(100*(norm(mu, sigma).cdf(q3+iqr)-norm(mu, sigma).cdf(q3) )), xy=(q3+iqr/2, 0), ha='center')
axes[1].annotate('q1', xy=(q1, norm(mu, sigma).pdf(q1)), ha='center')
axes[1].annotate('q3', xy=(q3, norm(mu, sigma).pdf(q3)), ha='center')
axes[1].set_ylabel('Probability Density')
#top boxplot
axes[0].boxplot(s, 0, 'gD', vert=False)
axes[0].axvline(median, color='orange', alpha=.6, linewidth=.5)
axes[0].axis('off')

Putting it all in a fuction:
# import warnings filter
from warnings import simplefilter
# ignore all future warnings
simplefilter(action='ignore', category=FutureWarning)
def CTD(df):
for col in df.columns:
n_bins = 50
fig, axes = plt.subplots(nrows=2, ncols=1, sharex=True)
#histogram
n, bins, patches = axes[1].hist(boston[col], n_bins, density=True, alpha=.1, edgecolor='black' )
#data = pd.Series(s)
mu = boston[col].mean()
sigma = boston[col].std()
pdf = 1/(sigma*np.sqrt(2*np.pi))*np.exp(-(bins-mu)**2/(2*sigma**2))
median, q1, q3 = np.percentile(boston.age, 50), np.percentile(boston[col], 25), np.percentile(boston[col], 75)
#probability density function
axes[1].plot(bins, pdf, color='orange', alpha=.6)
#axes[1].figsize=(10,20)
#fill from Q1-1.5*IQR to Q1 and Q3 to Q3+1.5*IQR
iqr = 1.5 * (q3-q1)
x1 = np.linspace(q1 - iqr, q1)
x2 = np.linspace(q3, q3 + iqr)
pdf1 = 1/(sigma*np.sqrt(2*np.pi))*np.exp(-(x1-mu)**2/(2*sigma**2))
pdf2 = 1/(sigma*np.sqrt(2*np.pi))*np.exp(-(x2-mu)**2/(2*sigma**2))
axes[1].fill_between(x1, pdf1, 0, alpha=.6, color='orange')
axes[1].fill_between(x2, pdf2, 0, alpha=.6, color='orange')
#add text to bottom graph.
axes[1].annotate("{:.1f}%".format(100*(norm(mu, sigma).cdf(q1) -norm(mu, sigma).cdf(q1-iqr))), xy=(q1-iqr/2, 0), ha='center')
axes[1].annotate("{:.1f}%".format(100*(norm(mu, sigma).cdf(q3) -norm(mu, sigma).cdf(q1) )), xy=(median , 0), ha='center')
axes[1].annotate("{:.1f}%".format(100*(norm(mu, sigma).cdf(q3+iqr)-norm(mu, sigma).cdf(q3) )), xy=(q3+iqr/2, 0), ha='center')
axes[1].annotate('q1', xy=(q1, norm(mu, sigma).pdf(q1)), ha='center')
axes[1].annotate('q3', xy=(q3, norm(mu, sigma).pdf(q3)), ha='center')
#dashed lines
plt.axvline(df[col].quantile(0),color='b', linestyle='-.')
plt.axvline(df[col].quantile(0.25),color='g', linestyle='--')
plt.axvline(df[col].quantile(0.50),color='g', linestyle='--')
plt.axvline(df[col].quantile(0.75),color='b', linestyle='--')
plt.axvline(df[col].quantile(1),color='r', linestyle='-.')
axes[1].set_ylabel('Probability Density')
#top boxplot
axes[0].boxplot(df[col], 0, 'gD', vert=False)
axes[0].axvline(median, color='orange', alpha=.6, linewidth=.5)
axes[0].axis('off')
plt.rcParams["figure.figsize"] = (18,10)
calling function:
CTD(boston)
If this doesn't work for you:
Try this:
# import warnings filter
from warnings import simplefilter
# ignore all future warnings
simplefilter(action='ignore', category=FutureWarning)
def CTD(df):
for col in df.columns:
sns.set(rc={'figure.figsize':(24,6)})
plt.figure()
plt.subplot(121)
sns.distplot(df[col])
plt.axvline(np.mean(df[col]),color='b', linestyle='--') # Blue line for mean
plt.axvline(np.median(df[col]),color='r', linestyle='--')# Red line for Median
plt.subplot(122)
sns.distplot(df[col])
plt.axvline(df[col].quantile(0),color='b', linestyle='-.')
plt.axvline(df[col].quantile(0.25),color='g', linestyle='--')
plt.axvline(df[col].quantile(0.50),color='g', linestyle='--')
plt.axvline(df[col].quantile(0.75),color='b', linestyle='--')
plt.axvline(df[col].quantile(1),color='r', linestyle='-.')
This Create dashed lines on KDE plot having quantiles.

Related

Python - Bland-Altman Plot with Text Customization

I am trying to Create the Bland-Altman Plot with the text having on the left side of the plot instead of having it as the default configuration on the right hand side
This is my code
import pandas as pd
df = pd.DataFrame({'A': [5, 5, 5, 6, 6, 7, 7, 7, 8, 8, 9,
10, 11, 13, 14, 14, 15, 18, 22, 25],
'B': [4, 4, 5, 5, 5, 7, 8, 6, 9, 7, 7, 11,
13, 13, 12, 13, 14, 19, 19, 24]})
import statsmodels.api as sm
import matplotlib.pyplot as plt
#create Bland-Altman plot
f, ax = plt.subplots(1, figsize = (8,5))
sm.graphics.mean_diff_plot(df.A, df.B, ax = ax)
#display Bland-Altman plot
plt.show()
So I want to have the "mean", the "SD+" and the "SD-" on the left side of the X-axis, not on the right.
thanks for your help or any suggestions!
I don't know, but I can use pyplot so:
mean_diff = (df.A-df.B).mean()
diff_range = (df.A-df.B).std()*1.96
plt.figure(figsize = (9,6))
plt.scatter(df.A, df.A-df.B, alpha=.5)
plt.hlines(mean_diff, df.A.min()-2, df.A.max()+2, color="k", linewidth=1)
plt.text(
df.A.min()-1, mean_diff+.05*diff_range, "mean diff: %.2f"%mean_diff,
fontsize=13,
)
plt.hlines(
[mean_diff+diff_range, mean_diff-diff_range],
df.A.min()-2, df.A.max()+2, color="k", linewidth=1,
linestyle="--"
)
plt.text(
df.A.min()-1, mean_diff+diff_range+.05*diff_range,
"+SD1.96: %.2f"%(mean_diff+diff_range),
fontsize=13,
)
plt.text(
df.A.min()-1, mean_diff-diff_range+.05*diff_range,
"-SD1.96: %.2f"%(mean_diff-diff_range),
fontsize=13,
)
plt.xlim(df.A.min()-2, df.A.max()+2)
plt.ylim(mean_diff-diff_range*1.5, mean_diff+diff_range*1.5)
plt.xlabel("Means", fontsize=15)
plt.ylabel("Difference", fontsize=15)
plt.show()
result:

How do I change the "str" ​labels in a function to "int" and return a plt.legend() that concatenates the two labels in an "int, str" format?

I have a function that allows me to display the circle of correlations of my pca.
The problem with this function is that the labels of my variables (column names) prevent me from reading my results correctly.
To overcome this problem, I have to insert a line of code before my function to associate numbers with the labels of the variables (of the df used to make my pca):
n_labels = [value for value in range(1, (len(df.columns) + 1))]
I tried unsuccessfully to insert this line in my function:
import matplotlib.pyplot as plt
from matplotlib.collections import LineCollection
import numpy as np
import pandas as pd
def display_circles(pcs,
n_comp,
pca,
axis_ranks,
labels=None,
label_rotation=0,
lims=None):
for d1, d2 in axis_ranks: # We display the first 3 factorial planes, so the first 6 components
if d2 < n_comp:
# figure initialization
fig, ax = plt.subplots(figsize=(10, 8))
# determination of graph limits
if lims is not None:
xmin, xmax, ymin, ymax = lims
elif pcs.shape[1] < 30:
xmin, xmax, ymin, ymax = -1, 1, -1, 1
else:
xmin, xmax, ymin, ymax = min(pcs[d1, :]), max(pcs[d1, :]), min(
pcs[d2, :]), max(pcs[d2, :])
# arrow display
# if there are more than 30 arrows, the triangle is not displayed at their end
if pcs.shape[1] < 30:
plt.quiver(np.zeros(pcs.shape[1]),
np.zeros(pcs.shape[1]),
pcs[d1, :],
pcs[d2, :],
angles='xy',
scale_units='xy',
scale=1,
color="grey")
else:
lines = [[[0, 0], [x, y]] for x, y in pcs[[d1, d2]].T]
ax.add_collection(
LineCollection(lines, axes=ax, alpha=.1, color='black'))
# display of variable names
if labels is not None:
for i, (x, y) in enumerate(pcs[[d1, d2]].T):
if x >= xmin and x <= xmax and y >= ymin and y <= ymax:
plt.text(x,
y,
labels[i],
fontsize='22',
ha='center',
va='bottom',
rotation=label_rotation,
color="red",
alpha=0.7)
# circle display
circle = plt.Circle((0, 0), 1, facecolor='none', edgecolor='b')
plt.gca().add_artist(circle)
# setting graph limits
plt.xlim(xmin, xmax)
plt.ylim(ymin, ymax)
# display of horizontal and vertical lines
plt.plot([-1, 1], [0, 0], color='grey', ls='--')
plt.plot([0, 0], [-1, 1], color='grey', ls='--')
# names of the axes, with the percentage of inertia explained
plt.xlabel('F{} ({}%)'.format(
d1 + 1, round(100 * pca.explained_variance_ratio_[d1], 1)),
fontsize=14)
plt.ylabel('F{} ({}%)'.format(
d2 + 1, round(100 * pca.explained_variance_ratio_[d2], 1)),
fontsize=14)
plt.title("Circle of correlations (F{} and F{})".format(
d1 + 1, d2 + 1),
size=24)
plt.show()
This is how I call my function:
import pandas as pd
from sklearn import decomposition, preprocessing
# Here a dataset for the example:
column_1 = [1, 2, 3, 4, 5, 6, 7 ,8]
column_2 = [4, 2, 9, 23, 3, 52, 41, 4]
column_3 = [9, 8, 7, 6, 6, 9, 24, 11]
column_4 = [45, 36, 74, 35, 29, 45, 29, 39]
column_5 = [35, 84, 3, 54, 68, 78, 65, 97]
column_6 = [24, 96, 7, 54, 67, 69, 88, 95]
column_7 = [5, 39, 72, 42, 22, 41, 24, 41]
column_8 = [30, 98, 8, 67, 68, 41, 27, 87]
df = pd.DataFrame({'column_1': column_1,
'column_2': column_2,
'column_3': column_3,
'column_4': column_4,
'column_5': column_5,
'column_6': column_6,
'column_7': column_7,
'column_8': column_8})
pca_data = preprocessing.scale(df)
pca = decomposition.PCA(n_components = 8)
pca.fit(pca_data)
# We set the number of components
n_comp = 2
# Recovery of the components of the pca object
pcs = pca.components_
# We label a number to each column name
n_labels = [value for value in range(1, (len(df.columns) + 1))]
display_circles(pcs, n_comp, pca, [(0, 1), (0, 2)], labels=n_labels)
for element in zip(n_labels, df.columns):
print(element)
Here is my obtained result:
Edit 1: that i would like (UPD: with the answer of #Stef -Thanks you very much and congratulations for this solution-)
it's almost perfect but the problem is when I use this function:
n_comp = 3
pcs = pca.components_
# I always have to write this line to get a label number
n_labels=[value for value in range(1,(len(list_candidates.columns)+1))]
display_circles(pcs, n_comp, pca, [(0, 1), (0, 2)], labels=n_labels)
on my real dataframe, this throws me two problems:
I still have to include the line
n_labels=[value for value in range(1,(len(list_candidates.columns)+1))]
to obtain a label number instead of the name of my variables.
I get the error message "NameError: name 'df' is not defined" when running
display_circles(pcs, n_comp, pca, [(0, 1), (0, 2)], labels=n_labels)
So I'm looking to define my display_circles() function so that when I set the labels="name_of_the_df" argument it returns me the same result as
n_labels=[value for value in range(1,(len("name_of_the_df".columns)+1))]
plus a plt.legend() like the one made by #Steph (thanks)
To get this (desired) result:
I also have to modify "name_of_the_df" in the function definition:
#legend
plt.legend(n_labels,
candidate_list.columns,
handler_map={int: IntHandler()},
bbox_to_anchor=(1, 1))
You can define your own legend handler for integers:
from matplotlib.text import Text
class IntHandler:
def legend_artist(self, legend, orig_handle, fontsize, handlebox):
x0, y0 = handlebox.xdescent, handlebox.ydescent
text = Text(x0, y0, str(orig_handle), color='red')
handlebox.add_artist(text)
return text
and then call
plt.legend(n_labels, df.columns, handler_map={int: IntHandler()}, bbox_to_anchor=(1,1))
before plt.show() in display_circles:
Full example as per comment below and edited question:
import matplotlib.pyplot as plt
from matplotlib.collections import LineCollection
from matplotlib.text import Text
import numpy as np
import pandas as pd
from sklearn import decomposition, preprocessing
class IntHandler:
def legend_artist(self, legend, orig_handle, fontsize, handlebox):
x0, y0 = handlebox.xdescent, handlebox.ydescent
text = Text(x0, y0, str(orig_handle), color='red')
handlebox.add_artist(text)
return text
def display_circles(pcs,
n_comp,
pca,
axis_ranks,
labels=None,
label_rotation=0,
lims=None):
if labels == None:
labels = range(1, len(pca.feature_names_in_) + 1)
for d1, d2 in axis_ranks: # We display the first 3 factorial planes, so the first 6 components
if d2 < n_comp:
# figure initialization
fig, ax = plt.subplots(figsize=(10, 8))
# determination of graph limits
if lims is not None:
xmin, xmax, ymin, ymax = lims
elif pcs.shape[1] < 30:
xmin, xmax, ymin, ymax = -1, 1, -1, 1
else:
xmin, xmax, ymin, ymax = min(pcs[d1, :]), max(pcs[d1, :]), min(
pcs[d2, :]), max(pcs[d2, :])
# arrow display
# if there are more than 30 arrows, the triangle is not displayed at their end
if pcs.shape[1] < 30:
plt.quiver(np.zeros(pcs.shape[1]),
np.zeros(pcs.shape[1]),
pcs[d1, :],
pcs[d2, :],
angles='xy',
scale_units='xy',
scale=1,
color="grey")
else:
lines = [[[0, 0], [x, y]] for x, y in pcs[[d1, d2]].T]
ax.add_collection(
LineCollection(lines, axes=ax, alpha=.1, color='black'))
# display of variable names
if labels is not None:
for i, (x, y) in enumerate(pcs[[d1, d2]].T):
if x >= xmin and x <= xmax and y >= ymin and y <= ymax:
plt.text(x,
y,
labels[i],
fontsize='22',
ha='center',
va='bottom',
rotation=label_rotation,
color="red",
alpha=0.7,
)
# circle display
circle = plt.Circle((0, 0), 1, facecolor='none', edgecolor='b')
plt.gca().add_artist(circle)
# setting graph limits
plt.xlim(xmin, xmax)
plt.ylim(ymin, ymax)
# display of horizontal and vertical lines
plt.plot([-1, 1], [0, 0], color='grey', ls='--')
plt.plot([0, 0], [-1, 1], color='grey', ls='--')
# names of the axes, with the percentage of inertia explained
plt.xlabel('F{} ({}%)'.format(
d1 + 1, round(100 * pca.explained_variance_ratio_[d1], 1)),
fontsize=14)
plt.ylabel('F{} ({}%)'.format(
d2 + 1, round(100 * pca.explained_variance_ratio_[d2], 1)),
fontsize=14)
plt.title("Circle of correlations (F{} and F{})".format(
d1 + 1, d2 + 1),
size=24)
plt.legend(labels,
pca.feature_names_in_,
handler_map={int: IntHandler()},
bbox_to_anchor=(1,1))
plt.show()
# Here a dataset for the example:
column_1 = [1, 2, 3, 4, 5, 6, 7 ,8]
column_2 = [4, 2, 9, 23, 3, 52, 41, 4]
column_3 = [9, 8, 7, 6, 6, 9, 24, 11]
column_4 = [45, 36, 74, 35, 29, 45, 29, 39]
column_5 = [35, 84, 3, 54, 68, 78, 65, 97]
column_6 = [24, 96, 7, 54, 67, 69, 88, 95]
column_7 = [5, 39, 72, 42, 22, 41, 24, 41]
column_8 = [30, 98, 8, 67, 68, 41, 27, 87]
df = pd.DataFrame({'column_1': column_1,
'column_2': column_2,
'column_3': column_3,
'column_4': column_4,
'column_5': column_5,
'column_6': column_6,
'column_7': column_7,
'column_8': column_8})
pca_data = preprocessing.scale(df)
pca = decomposition.PCA(n_components = 8)
pca.fit(pd.DataFrame(pca_data, columns=df.columns))
# We set the number of components
n_comp = 2
# Recovery of the components of the pca object
pcs = pca.components_
display_circles(pcs, n_comp, pca, [(0, 1), (0, 2)])

how to change the scale of the x-axis so that the orange and blue graphs are displayed correctly

I cannot correctly depict two graphs: orange and blue. I've read the documentation, tried changing the values until nothing comes out.
How to show these graphs, as in 1 picture, together with others correctly
I get it like this
My code
My code:
import matplotlib.pyplot as plt
import numpy as np
x = np.array([0.264, 0.331, 0.397, 0.417, 0.438, 0.45, 0.459, 0.466])
y = np.array([0.01, 0.1, 1, 2, 4, 6, 8, 10])
# creating scatter plot with both negative
# and positive axes
plt.plot(x, y, marker='o')
x = np.array([0.375, 0.435, 0.494, 0.512, 0.53, 0.541, 0.548, 0.564])
y = np.array([0.01, 0.1, 1, 2, 4, 6, 8, 10])
plt.plot(x, y, marker='o')
x2points=np.array([-3.672, -3.733, -3.793, -3.811, -3.828, -3.839, -3.846, -3.852])
y2points = np.array([-0.01, -0.1, -1, -2, -4, -6, -8, -10])
plt.plot(x2points, y2points, marker = 'o')
x2points = np.array([-14.86, -14.931, -14.998, -15.018, -15.039, -15.051, -15.06, -15.067])
y2points = np.array([-0.01, -0.1, -1, -2, -4, -6, -8, -10])
plt.plot(x2points, y2points, marker='o')
# adding vertical line in data co-ordinates
plt.axvline(0, c='black', ls='--')
# adding horizontal line in data co-ordinates
plt.axhline(0, c='black', ls='--')
plt.xticks([-15,-14,-13,-12,-11,-10,-9,-8,-7,-6,-5,-4,-3,-2,-1,0,1])
plt.yticks([-10,-9,-8,-7,-6,-5,-4,-3,-2,-1,0,1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
# visualizing the plot using plt.show() function
plt.show()
The result I want to get
enter image description here
The red and green curve use an x-range from -15 to -4, while the orange and blue curves have a very limited range between 0.25 and 0.55. Displaying them together on the same graph will necessarily lead to the compression you see.
You can draw two subplots, and move them close together while sharing the y-axis. That way, different x-axes can be used:
import matplotlib.pyplot as plt
import numpy as np
fig, (ax1, ax2) = plt.subplots(ncols=2, figsize=(15, 4), sharey=True,
gridspec_kw={'wspace': 0})
x = np.array([0.264, 0.331, 0.397, 0.417, 0.438, 0.45, 0.459, 0.466])
y = np.array([0.01, 0.1, 1, 2, 4, 6, 8, 10])
# creating scatter plot with both negative
# and positive axes
ax2.plot(x, y, marker='o')
x = np.array([0.375, 0.435, 0.494, 0.512, 0.53, 0.541, 0.548, 0.564])
y = np.array([0.01, 0.1, 1, 2, 4, 6, 8, 10])
ax2.plot(x, y, marker='o')
x2points = np.array([-3.672, -3.733, -3.793, -3.811, -3.828, -3.839, -3.846, -3.852])
y2points = np.array([-0.01, -0.1, -1, -2, -4, -6, -8, -10])
ax1.plot([], []) # 2 dummy plots to move the color cycle
ax1.plot([], [])
ax1.plot(x2points, y2points, marker='o')
x2points = np.array([-14.86, -14.931, -14.998, -15.018, -15.039, -15.051, -15.06, -15.067])
y2points = np.array([-0.01, -0.1, -1, -2, -4, -6, -8, -10])
ax1.plot(x2points, y2points, marker='o')
# adding vertical line in data co-ordinates
# plt.axvline(0, c='black', ls='--')
# adding horizontal line in data co-ordinates
ax1.axhline(0, c='black', ls='--')
ax2.axhline(0, c='black', ls='--')
# set ticks and data limiits
ax1.set_xticks(range(-15, 0))
ax1.set_xlim(xmax=0)
ax1.set_yticks(range(-10, 11))
ax2.set_xlim(xmin=0)
plt.tight_layout()
plt.show()

How can I remove certain grid line but keep this tick in matplotlib

My matplotlib code is like this:
if __name__ == '__main__':
import matplotlib.pyplot as plt
from matplotlib.pyplot import MultipleLocator
x = [50, 100, 200, 300, 400, 500, 600, 700, 800, 900]
y = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
f, ax = plt.subplots(figsize=(10, 7), dpi=200)
ax.xaxis.set_major_locator(MultipleLocator(50))
plt.plot(x, y, 'o-', color="r", lw=4, ms=10)
plt.grid()
plt.savefig("question.png")
The figure it draws is like this
How can I remove the grid line in the 50 of the x axis, but keep the 50 tick in the graph?
Try using plt.axvline:
if __name__ == '__main__':
import matplotlib.pyplot as plt
from matplotlib.pyplot import MultipleLocator
x = [50, 100, 200, 300, 400, 500, 600, 700, 800, 900]
y = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
f, ax = plt.subplots(figsize=(10, 7), dpi=200)
ax.xaxis.set_major_locator(MultipleLocator(50))
plt.plot(x, y, 'o-', color="r", lw=4, ms=10)
for loc in plt.xticks()[0]:
if loc != 50:
plt.axvline(x=loc, color = 'grey', linestyle = '-', linewidth = 0.4)
plt.show()
Output:

bar graph with wrong width

I want to create a bar graph for a dataframe contains multiple categories, with a different color for each category. Below is my simplified code and resulting graph. The top subplot is a regular bar graph in one color, the bottom subplot is color coded but the bar width is messed up. Any suggestions? Thanks!
import random
import pandas as pd
import matplotlib.pyplot as plt
df = pd.DataFrame({'Cat': [1, 1, 2, 2, 2, 2, 3, 3, 4, 4, 4, 4],
'A': [2, 3, 6, 7, 9, 10, 15, 18, 22, 23, 24, 25],
'B': random.sample(range(1, 20), 12)})
fig = plt.figure(figsize=(15, 15/2.3))
ax = plt.subplot(2, 1, 1)
plt.bar(df.A, df.B)
plt.xlim(0, 30)
ax = plt.subplot(2, 1, 2)
for cat in df.Cat.unique():
df_ = df.loc[(df.Cat==cat), :]
plt.bar(df_.A, df_.B, width=0.5)
plt.xlim(0, 30)
plt.show()

Categories