Cut off text that cut the y-axis - python

I have a horizontal bar chart. I want the label in the bar, but if it crosses the y-axis it needs to be clipped.
My code:
import matplotlib.pyplot as plt
labels = ['Book 1', 'Book 2', 'Book 3']
values = [20, 35, 5]
fig, ax = plt.subplots()
y_pos = range(len(labels))
ax.barh(y_pos, values, align='center', color='blue')
for i, v in enumerate(values):
ax.text(v, i, "TESTSTETSTSTS", color='grey', va='center', ha='right')
ax.set_yticks(y_pos)
ax.set_yticklabels(labels)
ax.invert_yaxis()
ax.set_xlabel('Number')
ax.set_title('Number of sold books')
plt.show()
Wrong result:
Right result:

Here is the fixed code.
CODE
import matplotlib.pyplot as plt
labels = ['Book 1', 'Book 2', 'Book 3']
values = [20, 35, 5]
fig, ax = plt.subplots()
y_pos = range(len(labels))
ax.barh(y_pos, values, align='center', color='blue')
for i, v in enumerate(values):
ax.text(v, i, "TESTSTETSTSTS", color='grey', va='center', ha='right', clip_on=True) #HERE IS THE FIX
ax.set_yticks(y_pos)
ax.set_yticklabels(labels)
ax.invert_yaxis()
ax.set_xlabel('Number')
ax.set_title('Number of sold books')
plt.show()
evidence

Related

Two different graph ticks parameters on Y axes from one table

Considering below table:
country
points
price
England
91.550725
51.681159
India
90.222222
13.333333
Austria
90.190782
30.762772
Germany
89.836321
42.257547
Canada
89.377953
35.712598
d = {'points': [91.5, 90.2, 90.1, 89.8, 89.3],
'price': [51.6, 13.3,30.7, 42.2, 35.7]}
index=['England', 'India','Austria', 'Germany','Canada']
df = pd.DataFrame(index=index,data=d)
fig, ax1 = plt.subplots(figsize = (10,5))
color = 'tab:purple'
ax1.set_xlabel('Country', fontsize=12)
ax1.set_ylabel('Average Ratings', color=color, fontsize=12)
sns.barplot(x=df['points'],y=df.index, color=color)
ax1.tick_params(axis='y', labelcolor=color, labelsize = 12)
ax2 = ax1.twinx()
plt.xlim(12, 92)
color = 'tab:red'
ax2.set_ylabel('Price', color=color, fontsize=12)
sns.barplot(x=df['price'],y=df.index,color=color)
ax2.tick_params(axis='y', labelcolor=color, labelsize = 12)
My question: How can I modify the right side Y axis ticks parameters to price (red), so that it represents the numbers of price column as well as the title.
Pandas: 1.2.4
Seaborn: 0.11.1
Matplotlib: 3.3.4
I assume this comes close to what you want:
import pandas as pd
import seaborn as sns
from matplotlib import pyplot as plt
d = {'points': [91.5, 90.2, 90.1, 89.8, 89.3],
'price': [51.6, 13.3,30.7, 42.2, 35.7]}
index=['England', 'India','Austria', 'Germany','Canada']
df = pd.DataFrame(index=index,data=d)
fig, ax1 = plt.subplots(figsize = (10,5))
color = 'tab:purple'
#ax1.set_xlabel('Country', fontsize=12) <-- not necessary for your output
ax1.set_ylabel('Average country rating (in points)', color=color, fontsize=12) #mention unit for rating
sns.barplot(x=df['points'],y=df.index, color=color)
ax1.tick_params(axis='y', labelcolor=color, labelsize = 12)
ax2 = ax1.twinx()
plt.xlim(12, 92)
color = 'tab:red'
ax2.set_ylabel('Price (in $)', color=color, fontsize=12) #mention unit for price
sns.barplot(x=df['price'],y=df.index,color=color)
ax2.tick_params(axis='y', labelcolor=color, labelsize = 12)
ax2.set_yticklabels(df['price']) #relabel right axis with price values
ax1.set_xlabel("") #remove x-label because this axis applies to both categories
plt.show()
Sample output:
However, I hope you take the point into account that Trenton mentioned in a comment (now deleted). This graph is indeed rather difficult to read. The values on the left have their labels on the right, and vice versa.

Errorbar in Legend - Pandas Bar Plot

Is it possible to show the error bars in the legend?
(Like i draw in red)
They do not necessarily have to be the correct length, it is enough for me if they are indicated and recognizable.
My working sample:
import pandas as pd
import matplotlib.pyplot as plt
test = pd.DataFrame(data={'one':2000,'two':300,'three':50,'four':150}, index=['MAX'])
fig, ax = plt.subplots(figsize=(5, 3), dpi=230)
ax.set_ylim(-.12,.03)
# barplot
ax = test.loc[['MAX'],['one']].plot(position=5.5,color=['xkcd:camo green'], xerr=test.loc[['MAX'],['two']].values.T, edgecolor='black',linewidth = 0.3, error_kw=dict(lw=1, capsize=2, capthick=1),ax=ax,kind='barh',width=.025)
ax = test.loc[['MAX'],['one']].plot(position=7,color=['xkcd:moss green'], xerr=test.loc[['MAX'],['three']].values.T, edgecolor='black',linewidth = 0.3, error_kw=dict(lw=1, capsize=2, capthick=1),ax=ax,kind='barh',width=.025)
ax = test.loc[['MAX'],['one']].plot(position=8.5,color=['xkcd:light olive green'],xerr=test.loc[['MAX'],['four']].values.T, edgecolor='black',linewidth = 0.3, error_kw=dict(lw=1, capsize=2, capthick=1),ax=ax,kind='barh',width=.025)
# Legende
h0, l0 = ax.get_legend_handles_labels()
l0 = [r'MAX $1$', r'MAX $2$', r'MAX $3$']
legend = plt.legend(h0, l0, borderpad=0.15,labelspacing=0.1, frameon=True, edgecolor="xkcd:black", ncol=1, loc='upper left',framealpha=1, facecolor='white')
legend.get_frame().set_linewidth(0.3)
cur_axes = plt.gca()
cur_axes.axes.get_yaxis().set_ticklabels([])
cur_axes.axes.get_yaxis().set_ticks([])
plt.show()
I tried a few ways, no one works.
With Patch in legend_elements i get no lines for the errorbars, with the errorbar() function i can draw a figure with errorbars, but it semms not to work in the legend:
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.patches import Patch
from matplotlib.lines import Line2D
legend_elements = [
Line2D([1,2], [5,4], color='b', lw=1, label='Line'),
Patch(facecolor='orange', edgecolor='r', label='Color Patch'),
matplotlib.pyplot.errorbar(3, 3, yerr=None, xerr=1, marker='s',mfc='xkcd:camo green', mec='black',
ms=20, mew=2, fmt='-', ecolor="black", elinewidth=2, capsize=3,
barsabove=True, lolims=False, uplims=False, xlolims=False, xuplims=False,
errorevery=2, capthick=None, label="error"),
]
test = pd.DataFrame(data={'one':2000,'two':300,'three':50,'four':150}, index=['MAX'])
fig, ax = plt.subplots(figsize=(5, 3), dpi=230)
ax.set_ylim(-.12,.03)
# barplot
ax = test.loc[['MAX'],['one']].plot(position=5.5,color=['xkcd:camo green'], xerr=test.loc[['MAX'],['two']].values.T, edgecolor='black',linewidth = 0.3, error_kw=dict(lw=1, capsize=2, capthick=1),ax=ax,kind='barh',width=.025)
ax = test.loc[['MAX'],['one']].plot(position=7,color=['xkcd:moss green'], xerr=test.loc[['MAX'],['three']].values.T, edgecolor='black',linewidth = 0.3, error_kw=dict(lw=1, capsize=2, capthick=1),ax=ax,kind='barh',width=.025)
ax = test.loc[['MAX'],['one']].plot(position=8.5,color=['xkcd:light olive green'],xerr=test.loc[['MAX'],['four']].values.T, edgecolor='black',linewidth = 0.3, error_kw=dict(lw=1, capsize=2, capthick=1),ax=ax,kind='barh',width=.025)
# Legende
h0, l0 = ax.get_legend_handles_labels()
l0 = [r'MAX $1$', r'MAX $2$', r'MAX $3$']
legend = plt.legend(h0, l0, borderpad=0.15,labelspacing=0.1, frameon=True, edgecolor="xkcd:black", ncol=1, loc='upper left',framealpha=1, facecolor='white')
legend.get_frame().set_linewidth(0.3)
ax.legend(handles=legend_elements, loc='center')
cur_axes = plt.gca()
cur_axes.axes.get_yaxis().set_ticklabels([])
cur_axes.axes.get_yaxis().set_ticks([])
#plt.show()
Implementation based on the idea of
r-beginners:
import pandas as pd
import matplotlib.pyplot as plt
test = pd.DataFrame(data={'one':2000,'two':300,'three':50,'four':150}, index=['MAX'])
fig, ax = plt.subplots(figsize=(5, 3), dpi=150)
ax.set_ylim(0, 6)
ax.set_xlim(0, 2400)
ax1 = ax.twiny()
ax1.set_xlim(0, 2400)
ax1.set_xticks([])
ax.barh(1, width=test['one'], color=['xkcd:camo green'], edgecolor='black',linewidth = 0.3, label='MAX1')
ax.barh(2, width=test['one'], color=['xkcd:moss green'], edgecolor='black',linewidth = 0.3, label='MAX2')
ax.barh(3, width=test['one'], color=['xkcd:light olive green'], edgecolor='black',linewidth = 0.3, label='MAX3')
ax1.errorbar(test['one'], 1, xerr=test['two'], color='k', ecolor='k', fmt=',', lw=1, capsize=2, capthick=1, label='MAX1')
ax1.errorbar(test['one'], 2, xerr=test['three'], color='k', ecolor='k', fmt=',', lw=1, capsize=2, capthick=1, label='MAX2')
ax1.errorbar(test['one'], 3, xerr=test['four'], color='k', ecolor='k', fmt=',', lw=1, capsize=2, capthick=1, label='MAX3')
handler, label = ax.get_legend_handles_labels()
handler1, label1 = ax1.get_legend_handles_labels()
label1 = ['' for l in label1]
ax.legend(handler, label, loc='upper left', handletextpad=1.5)
ax1.legend(handler1, label1, loc='upper left', handletextpad=1., markerfirst=False, framealpha=0.001)
plt.show()
Changes:
ax1 gets the same limit as ax
all strings from label1 are deleted
in ax1.legend() the order of handler and label is exchanged and with the handlertextpad the error bars are shifted to the right
The method I came up with was to draw 'ax.barh' and 'ax1.errorbar()' and then superimpose the legends of each on top of each other. On one side, I minimized the transparency so that the legend below is visible; the error bar looks different because I made it biaxial.
import pandas as pd
import matplotlib.pyplot as plt
test = pd.DataFrame(data={'one':2000,'two':300,'three':50,'four':150}, index=['MAX'])
fig, ax = plt.subplots(figsize=(5, 3), dpi=230)
ax.set_ylim(0, 15)
ax.set_xlim(0, 2400)
ax1 = ax.twiny()
ax.barh(5.5, width=test['one'], color=['xkcd:camo green'], edgecolor='black',linewidth = 0.3, label='MAX1')
ax.barh(7.0, width=test['one'], color=['xkcd:moss green'], edgecolor='black',linewidth = 0.3, label='MAX2')
ax.barh(8.5, width=test['one'], color=['xkcd:light olive green'], edgecolor='black',linewidth = 0.3, label='MAX3')
ax1.errorbar(test['one'], 5.5, xerr=test['two'], color='k', ecolor='k', capsize=3, fmt='|', label='MAX1')
ax1.errorbar(test['one'], 7.0, xerr=test['three'], color='k', ecolor='k', capsize=3, fmt='|', label='MAX2')
ax1.errorbar(test['one'], 8.5, xerr=test['four'], color='k', ecolor='k', capsize=3, fmt='|', label='MAX3')
handler, label = ax.get_legend_handles_labels()
handler1, label1 = ax1.get_legend_handles_labels()
ax.legend(handler, label, loc='upper left', title='mix legend')
ax1.legend(handler1, label1, loc='upper left', title='mix legend', framealpha=0.001)
plt.show()
You can add lines manually on the chart, adjusting the color, thickness and position you prefer. It is a very manual and laborious solution, but it should work.
# Draw line
import matplotlib.lines as ln
import numpy as np
# new clear axis overlay with 0-1 limits
ax2 = plt.axes([0,0,1,1], facecolor=(1,1,1,0))
x1,y1 = np.array([[0.18, 0.21], [0.831, 0.831]])
line1 = ln.Line2D(x1, y1, lw=1, color='black', alpha=1)
x2,y2 = np.array([[0.18, 0.21], [0.783, 0.783]])
line2 = ln.Line2D(x2, y2, lw=1, color='black', alpha=1)
x3,y3 = np.array([[0.18, 0.21], [0.732, 0.732]])
line3 = ln.Line2D(x3, y3, lw=1, color='black', alpha=1)
ax2.add_line(line1)
ax2.add_line(line2)
ax2.add_line(line3)
plt.show()

Create separate distplot from countplot

How can I create distplot from countplot
plt.rcdefaults()
%config InlineBackend.figure_format='retina'
sns.set_style('darkgrid')
ax = sns.countplot(x='Age',hue='Gender',data=df,edgecolor="None")
ax.tick_params(bottom=False, left=False)
ax.set_axisbelow(True)
for rect in ax.patches:
x = rect.get_x() + rect.get_width()/2.
y = rect.get_height()
try:
ax.annotate("{}".format(int(y)), (x,y), ha='center', va='bottom', clip_on=True)
except:
pass
ax.set_xlabel('Age', color='green')
ax.set_ylabel('Count', color='green')
ax.set_title('Countplot for Age(Gender)', color='tomato',weight='bold')
plt.legend(title='Gender', fontsize='large', loc='upper right').get_frame().set_facecolor('white')
plt.tight_layout()
plt.savefig('files\\Countplot_for_Age(Gender).jpg')
I want distplot for 2 Genders either in same plot or separately
Any suggestions or help will be highly appreciable
The x-axis of a countplot is categorical: it puts one bar for each encountered age, skipping bars when there are no rows for a certain age (21 and 23 in the example). Internally the bars are numbered as 0, 1, 2, ...
The y-axis is the count, which is proportional to the number of rows.
For a distplot, the x-axis are the ages themselves, and the y-axis is a probability distribution, which usually are quite small numbers (the area under the curve is normalized to be 1).
So, as both the x-axis and the y-axis are different, it is better to use separate subplots.
A distplot can be generated directly from the given data. Passing the same ax results in two distplots in the same subplot. A distplot is a combination of a histogram and a kdeplot. If the histogram isn't needed, hist=False leaves
it out, or the kdeplot can be called directly. The shade=True option adds shading to the plot.
from matplotlib import pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
NF = 50
NM = 10
df = pd.DataFrame({'Age': np.concatenate([np.random.randint(13, 20, NF) + np.random.randint(2, 7, NF),
np.random.randint(15, 23, NM)]),
'Gender': np.repeat(['female', 'male'], (NF, NM))})
df['Age'] = df['Age'].where((df['Age'] != 21) & (df['Age'] != 23), 20)
sns.set_style('darkgrid')
fig, axs = plt.subplots(ncols=2, figsize=(12, 4))
ax = sns.countplot(x='Age', hue='Gender', data=df, edgecolor="None", ax=axs[0])
ax.tick_params(bottom=False, left=False)
ax.set_axisbelow(True)
for rect in ax.patches:
x = rect.get_x() + rect.get_width() / 2.
y = rect.get_height()
ax.annotate(f"{y:.0f}", (x, y), ha='center', va='bottom', clip_on=True)
ax.set_xlabel('Age', color='green')
ax.set_ylabel('Count', color='green')
ax.set_title('Countplot for Age(Gender)', color='tomato', weight='bold')
ax.legend(title='Gender', fontsize='large', loc='upper right').get_frame().set_facecolor('white')
for gender in ('female', 'male'):
# ax2 = sns.kdeplot(df[df['Gender'] == gender]['Age'], shade=True, ax=axs[1], label=gender)
ax2 = sns.distplot(df[df['Gender'] == gender]['Age'], hist=False, kde_kws={'shade': True}, ax=axs[1], label=gender)
ax2.set_axisbelow(True)
ax2.set_xlabel('Age', color='green')
ax2.set_ylabel('probability distribution', color='green')
ax2.set_title('Distplot for Age(Gender)', color='tomato', weight='bold')
ax2.legend(title='Gender', fontsize='large', loc='upper right').get_frame().set_facecolor('white')
plt.tight_layout()
plt.show()

Connecting jittered data points with lines - seaborn python

Is there a way to extract the x-axis values of the jittered points in the middle plot as generated by the code below?
See the issue here
# import libraries
import seaborn as sns
import matplotlib.pyplot as plt
# Create plot
ax2 = fig.add_subplot(132)
sns.stripplot(x="variable", y="value", data=pupil_long_df, dodge=True, jitter=True, alpha=.40, zorder=1, size=8, linewidth = 1)
sns.pointplot(x='variable', y='value', ci=95,data=pupil_long_df, join=False, scale=1, zorder=100, color='black', capsize = 0.05, palette = 'Paired')
# Add lines between the points
lines3 = plt.plot([df.iloc[:,0], df.iloc[:,1]], color = 'grey', linewidth = 0.5, linestyle = '--')
I think it would be terribly impractical to extract the x-values of the stripplot... My standard advice is that if you want to do something more than the standard plots offered by seaborn, then it's usually easier to just recreate them by hand. See the code below:
N=20
# dummy dataset
data = np.random.normal(size=(N,))
df = pd.DataFrame({'condition 1': data,
'condition 2': data+1,
'condition 3': data,
'condition 4': data-1})
jitter = 0.05
df_x_jitter = pd.DataFrame(np.random.normal(loc=0, scale=jitter, size=df.values.shape), columns=df.columns)
df_x_jitter += np.arange(len(df.columns))
fig, ax = plt.subplots()
for col in df:
ax.plot(df_x_jitter[col], df[col], 'o', alpha=.40, zorder=1, ms=8, mew=1)
ax.set_xticks(range(len(df.columns)))
ax.set_xticklabels(df.columns)
ax.set_xlim(-0.5,len(df.columns)-0.5)
for idx in df.index:
ax.plot(df_x_jitter.loc[idx,['condition 1','condition 2']], df.loc[idx,['condition 1','condition 2']], color = 'grey', linewidth = 0.5, linestyle = '--', zorder=-1)
ax.plot(df_x_jitter.loc[idx,['condition 3','condition 4']], df.loc[idx,['condition 3','condition 4']], color = 'grey', linewidth = 0.5, linestyle = '--', zorder=-1)

Average line for bar chart in matplotlib

How do we draw an average line (horizontal) for a histogram in using matplotlib?
Right now, I'm able to draw the histogram without any issues.
Here is the code I'm using:
## necessary variables
ind = np.arange(N) # the x locations for the groups
width = 0.2 # the width of the bars
plt.tick_params(axis='both', which='major', labelsize=30)
plt.tick_params(axis='both', which='minor', labelsize=30)
ax2 = ax.twinx()
## the bars
rects1 = ax.bar(ind, PAAE1, width,
color='0.2',
error_kw=dict(elinewidth=2,ecolor='red'),
label='PAAE1')
rects2 = ax.bar(ind+width, PAAE2, width,
color='0.3',
error_kw=dict(elinewidth=2,ecolor='black'),
label='PAAE2')
rects3 = ax2.bar(ind+width+width, AAE1, width,
color='0.4',
error_kw=dict(elinewidth=2,ecolor='red'),
label='AAE1')
rects4 = ax2.bar(ind+3*width, AAE2, width,
color='0.5',
error_kw=dict(elinewidth=2,ecolor='black'),
label='AAE3')
maxi = max(dataset[2])
maxi1 = max(dataset[4])
f_max = max(maxi, maxi1)
lns = [rects1,rects2,rects3,rects4]
labs = [l.get_label() for l in lns]
ax.legend(lns, labs, loc='upper center', ncol=4)
# axes and labels
ax.set_xlim(-width,len(ind)+width)
ax.set_ylim(0, 100)
ax.set_ylabel('PAAE', fontsize=25)
ax2.set_ylim(0, f_max+500)
ax2.set_ylabel('AAE (mW)', fontsize=25)
xTickMarks = dataset[0]
ax.set_xticks(ind+width)
xtickNames = ax.set_xticklabels(xTickMarks)
plt.setp(xtickNames, rotation=90, fontsize=25)
I want to plot the average line for PAAE 1, 2 and AAE 1, 2.
What should I be using to plot the average line?
If you'd like a vertical line to denote the mean use axvline(x_value). This will place a vertical line that always spans the full (or specified fraction of) y-axis. There's also axhline for horizontal lines.
In other works, you might have something like this:
ax.axvline(data1.mean(), color='blue', linewidth=2)
ax.axvline(data2.mean(), color='green', linewidth=2)
As a more complete, but unnecessarily complex example (most of this is nicely annotating the means with curved arrows):
import numpy as np
import matplotlib.pyplot as plt
data1 = np.random.normal(0, 1, 1000)
data2 = np.random.normal(-2, 1.5, 1000)
fig, ax = plt.subplots()
bins = np.linspace(-10, 5, 50)
ax.hist(data1, bins=bins, color='blue', label='Dataset 1',
alpha=0.5, histtype='stepfilled')
ax.hist(data2, bins=bins, color='green', label='Dataset 2',
alpha=0.5, histtype='stepfilled')
ax.axvline(data1.mean(), color='blue', linewidth=2)
ax.axvline(data2.mean(), color='green', linewidth=2)
# Add arrows annotating the means:
for dat, xoff in zip([data1, data2], [15, -15]):
x0 = dat.mean()
align = 'left' if xoff > 0 else 'right'
ax.annotate('Mean: {:0.2f}'.format(x0), xy=(x0, 1), xytext=(xoff, 15),
xycoords=('data', 'axes fraction'), textcoords='offset points',
horizontalalignment=align, verticalalignment='center',
arrowprops=dict(arrowstyle='-|>', fc='black', shrinkA=0, shrinkB=0,
connectionstyle='angle,angleA=0,angleB=90,rad=10'),
)
ax.legend(loc='upper left')
ax.margins(0.05)
plt.show()

Categories