Related
I want to make boxplots with hues but I want to color code it so that each specific X string is a certain color with the hue just being a lighter color. I am able to do a boxplot without a hue. When I incorporate the hue, I get the second boxplot which loses the colors. Can someone help me customize the colors for the figure that contains the hue?
Essentially, its what the answer for this question is but with boxplots.
This is my code:
first boxplot
order=['Ash1','E1A','FUS','p53']
colors=['gold','teal','darkorange','royalblue']
color_dict=dict(zip(order,colors))
fig,ax=plt.subplots(figsize=(25,15))
bp=sns.boxplot(data=df_idrs, x=df_idrs["construct"], y=df_idrs['Norm_Ef_IDR/Ef_GS'],ax=ax,palette=color_dict)
sns.stripplot(ax=ax,y='Norm_Ef_IDR/Ef_GS', x='construct', data=df_idrs,palette=color_dict,
jitter=1, marker='o', alpha=0.4,edgecolor='black',linewidth=1, dodge=True)
ax.axhline(y=1,linestyle="--",color='black',linewidth=2)
plt.legend(loc='upper left', bbox_to_anchor=(1.03, 1))
second boxplot
order=['Ash1','E1A','FUS','p53']
colors=['gold','teal','darkorange','royalblue']
color_dict=dict(zip(order,colors))
fig,ax=plt.subplots(figsize=(25,15))
bp=sns.boxplot(data=df_idrs, x=df_idrs["construct"], y=df_idrs['Norm_Ef_IDR/Ef_GS'],ax=ax, hue=df_idrs["location"])
sns.stripplot(y='Norm_Ef_IDR/Ef_GS', x='construct', data=df_idrs, hue=df_idrs["location"],
jitter=1, marker='o', alpha=0.4,edgecolor='black',linewidth=1, dodge=True)
ax.axhline(y=1,linestyle="--",color='black',linewidth=2)
plt.legend(loc='upper left', bbox_to_anchor=(1.03, 1))
The only thing that changed was the palette to hue. I have seen many examples on here but I am unable to get them to work. Using the second code, I have tried the following:
Nothing happens for this one.
for ind, bp in enumerate(ax.findobj(PolyCollection)):
rgb = to_rgb(colors[ind // 2])
if ind % 2 != 0:
rgb = 0.5 + 0.5 * np.array(rgb) # make whiter
bp.set_facecolor(rgb)
I get index out of range for the following one.
for i in range(0,4):
mybox = bp.artists[i]
mybox.set_facecolor(color_dict[order[i]])
Matplotlib stores the boxes in ax.patches, but there are also 2 dummy patches (used to construct the legend) that need to be filtered away. The dots of the stripplot are stored in ax.collections. There are also 2 dummy collections for the legend, but as those come at the end, they don't form a problem.
Some remarks:
sns.boxplot returns the subplot on which it was drawn; as it is called with ax=ax it will return that same ax
Setting jitter=1in the stripplot will smear the dots over a width of 1. 1 is the distance between the x positions, and the boxes are only 0.4 wide. To avoid clutter, the code below uses jitter=0.4.
Here is some example code starting from dummy test data:
from matplotlib import pyplot as plt
from matplotlib.legend_handler import HandlerTuple
from matplotlib.patches import PathPatch
from matplotlib.colors import to_rgb
import seaborn as sns
import pandas as pd
import numpy as np
np.random.seed(20230215)
order = ['Ash1', 'E1A', 'FUS', 'p53']
colors = ['gold', 'teal', 'darkorange', 'royalblue']
hue_order = ['A', 'B']
df_idrs = pd.DataFrame({'construct': np.repeat(order, 200),
'Norm_Ef_IDR/Ef_GS': (np.random.normal(0.03, 1, 800).cumsum() + 10) / 15,
'location': np.tile(np.repeat(hue_order, 100), 4)})
fig, ax = plt.subplots(figsize=(12, 5))
sns.boxplot(data=df_idrs, x=df_idrs['construct'], y=df_idrs['Norm_Ef_IDR/Ef_GS'], hue='location',
order=order, hue_order=hue_order, ax=ax)
box_colors = [f + (1 - f) * np.array(to_rgb(c)) # whiten colors depending on hue
for c in colors for f in np.linspace(0, 0.5, len(hue_order))]
box_patches = [p for p in ax.patches if isinstance(p, PathPatch)]
for patch, color in zip(box_patches, box_colors):
patch.set_facecolor(color)
sns.stripplot(y='Norm_Ef_IDR/Ef_GS', x='construct', data=df_idrs, hue=df_idrs['location'],
jitter=0.4, marker='o', alpha=0.4, edgecolor='black', linewidth=1, dodge=True, ax=ax)
for collection, color in zip(ax.collections, box_colors):
collection.set_facecolor(color)
ax.axhline(y=1, linestyle='--', color='black', linewidth=2)
handles = [tuple(box_patches[i::len(hue_order)]) for i in range(len(hue_order))]
ax.legend(handles=handles, labels=hue_order, title='hue category',
handlelength=4, handler_map={tuple: HandlerTuple(ndivide=None, pad=0)},
loc='upper left', bbox_to_anchor=(1.01, 1))
plt.tight_layout()
plt.show()
I have a horizontal barplot, for example, a simplified version of the example from the seaborn documentation:
import seaborn as sns
import matplotlib.pyplot as plt
f, ax = plt.subplots(figsize=(6, 15))
crashes = sns.load_dataset("car_crashes").sort_values("total", ascending=False)
sns.barplot(x="total", y="abbrev", data=crashes,
label="Total", color="b")
ax.set(xlim=(0, 24), ylabel="",
xlabel="Automobile collisions per billion miles")
plt.show()
How can I get the bars labeled with the value for each bar?
I tried this approach for vertical bars (How to add percentages on top of bars in seaborn), but it doesn't seem to work. Changing height to width doesn't have the effect I assumed it would.
for p in ax.patches:
height = p.get_width()
ax.text(p.get_y()+p.get_height()/2.,
height + 3,
'{:1.2f}'.format(height),
ha="center")
I'm assuming the horizontal plot works differently?
Got it, thanks to #ImportanceOfBeingErnest
This worked for me
for p in ax.patches:
width = p.get_width() # get bar length
ax.text(width + 1, # set the text at 1 unit right of the bar
p.get_y() + p.get_height() / 2, # get Y coordinate + X coordinate / 2
'{:1.2f}'.format(width), # set variable to display, 2 decimals
ha = 'left', # horizontal alignment
va = 'center') # vertical alignment
As of matplotlib 3.4.0
Use the new built-in ax.bar_label, which will automatically label bar containers regardless of orientation:
fig, ax = plt.subplots(figsize=(6, 8))
sns.barplot(x="total", y="abbrev", data=crashes)
# new helper method to auto-label bars
ax.bar_label(ax.containers[0])
If the bars are grouped by hue, call ax.bar_label on all the containers:
fig, ax = plt.subplots(figsize=(5, 6))
ax = sns.barplot(x="tip", y="day", hue="smoker", data=tips)
# grouped bars will have multiple containers
for container in ax.containers:
ax.bar_label(container)
Thank you very much for this. It helped me a lot, but i ran to a problem, where percents had to many digits after decimal point, the format can be then simply specified:
for container in ax.containers:
ax.bar_label(container,size=8,fmt='%.1f')
I have this sample data:
import matplotlib.pyplot as plt
from matplotlib.patches import Patch
import seaborn as sns
import pandas as pd
df = pd.DataFrame({'AAAAAAAAAAAAAAAAAAAA': np.random.choice([False,True], 100000),
'BBBBBBBBBBBBBBBBBBBB': np.random.choice([False,True], 100000),
'CCCCCCCCCCCCCCCCCCCC': np.random.choice([False,True], 100000)},
index= np.random.choice([202006,202006, 202006,202005,202005,202005,202004,202004,202003], 100000)).sort_index(ascending=False)
With this plot:
fig, ax = plt.subplots(figsize=(5, 6))
cmap = sns.mpl_palette("Set2", 2)
sns.heatmap(data=df, cmap=cmap, cbar=False)
plt.xticks(rotation=90, fontsize=10)
plt.yticks(rotation=0, fontsize=10)
legend_handles = [Patch(color=cmap[True], label='Missing Value'), # red
Patch(color=cmap[False], label='Non Missing Value')] # green
plt.legend(handles=legend_handles, ncol=2, bbox_to_anchor=[0.5, 1.02], loc='lower center', fontsize=8, handlelength=.8)
plt.tight_layout()
plt.show()
The overlapping occurs because of the length of the variables names (I cannot change them as they are informative in my real plot). So, I need to decrease the frequency of y-ticks, it could be two ticks per value (when the month changes), or simply? eliminating the overlapping you see in the image above. The y-ticks of this plot needs to show clearly when the next month starts and ends (202006 means June of 2020), because with the real data I have, I can see if a whole piece of data is missing for a whole month (or more months) for any variable.
All possible-adaptable solutions I have found are based when the ticks are from a column: Change tick frequency, adding space between ticks labels, increase spacing between ticks, among others. but I'm still struggling with any adaptation.
Any suggestions?
NOTE: You can't increase/decrease the size of the figure.
Create your DataFrame with a small correction, namely set the number
of elements as a variable (n):
n = 100000
df = pd.DataFrame({'AAAAAAAAAAAAAAAAAAAA': np.random.choice([False,True], n),
'BBBBBBBBBBBBBBBBBBBB': np.random.choice([False,True], n),
'CCCCCCCCCCCCCCCCCCCC': np.random.choice([False,True], n)},
index = np.random.choice([202006,202006, 202006,202005,202005,202005,
202004,202004,202003], n)).sort_index(ascending=False)
Then run your drawing code with another 2 corrections, namely:
set yLabelNo = 10 (the number of y labels),
pass yticklabels=n // yLabelNo to sns.heatmap.
So the code is:
yLabelNo = 10
fig, ax = plt.subplots(figsize=(5, 6))
cmap = sns.mpl_palette("Set2", 2)
sns.heatmap(data=df, cmap=cmap, cbar=False, yticklabels=n // yLabelNo)
plt.xticks(rotation=90, fontsize=10)
plt.yticks(rotation=0, fontsize=10)
legend_handles = [Patch(color=cmap[True], label='Missing Value'), # red
Patch(color=cmap[False], label='Non Missing Value')] # green
plt.legend(handles=legend_handles, ncol=2, bbox_to_anchor=[0.5, 1.02],
loc='lower center', fontsize=8, handlelength=.8)
plt.tight_layout()
plt.show()
And the result is:
If you wish, experiment with other (maybe smaller) values of yLabelNo.
Can someone please help me plot x axis labels in percentages given the following code of my horizontal bar chart?
Finding it difficult to find as I want a more simplistic chart without x axis labels and ticks.
[Horizontal Bar Chart][1]
# Plot the figure size
plt.figure(figsize= (8,6))
# New variable and plot the question of the data frame in a normalized in a horizontal bar chat.
ax1 = df[q1].value_counts(normalize=True).sort_values().plot(kind="barh", color='#fd6a02', width=0.75, zorder=2)
# Draw vague vertical axis lines and set lines to the back of the order
vals = ax1.get_xticks()
for tick in vals:
ax1.axvline(x=tick, linestyle='dashed', alpha=0.4, color = '#d3d3d3', zorder=1)
# Tot3als to produce a composition ratio
total_percent = df[q1].value_counts(normalize=True) *100
# Remove borders
ax1.spines['right'].set_visible(False)
ax1.spines['top'].set_visible(False)
ax1.spines['left'].set_visible(False)
ax1.spines['bottom'].set_visible(False)
# Set the title of the graph inline with the Y axis labels.
ax1.set_title(q1, weight='bold', size=14, loc = 'left', pad=20, x = -0.16)
# ax.text(x,y,text,color)
for i,val in enumerate(total):
ax1.text(val - 1.5, i, str("{:.2%}".format(total_percent), color="w", fontsize=10, zorder=3)
# Create axis labels
plt.xlabel("Ratio of Responses", labelpad=20, weight='bold', size=12)
Each time I get a EOF error. Can someone help?
It's not based on your code, but I'll customize the answer from the official reference.
The point is achieved with ax.text(), which is a looping process.
import matplotlib.pyplot as plt
import numpy as np
# Fixing random state for reproducibility
np.random.seed(19680801)
plt.rcdefaults()
fig, ax = plt.subplots()
# Example data
people = ('Tom', 'Dick', 'Harry', 'Slim', 'Jim')
y_pos = np.arange(len(people))
performance = 3 + 10 * np.random.rand(len(people))
ax.barh(y_pos, performance, align='center')
ax.set_yticks(y_pos)
ax.set_yticklabels(people)
ax.invert_yaxis() # labels read top-to-bottom
ax.set_xlabel('Performance')
ax.set_title('How fast do you want to go today?')
# Totals to produce a composition ratio
total = sum(performance)
# ax.text(x,y,text,color)
for i,val in enumerate(performance):
ax.text(val - 1.5, i, str("{:.2%}".format(val/total)), color="w", fontsize=10)
plt.show()
I want to plot a bar chart or a histogram using matplotlib. I don't want a stacked bar plot, but a superimposed barplot of two lists of data, for instance I have the following two lists of data with me:
Some code to begin with :
import matplotlib.pyplot as plt
from numpy.random import normal, uniform
highPower = [1184.53,1523.48,1521.05,1517.88,1519.88,1414.98,1419.34,
1415.13,1182.70,1165.17]
lowPower = [1000.95,1233.37, 1198.97,1198.01,1214.29,1130.86,1138.70,
1104.12,1012.95,1000.36]
plt.hist(highPower, bins=10, histtype='stepfilled', normed=True,
color='b', label='Max Power in mW')
plt.hist(lowPower, bins=10, histtype='stepfilled', normed=True,
color='r', alpha=0.5, label='Min Power in mW')
I want to plot these two lists against the number of values in the two lists such that I am able to see the variation per reading.
You can produce a superimposed bar chart using plt.bar() with the alpha keyword as shown below.
The alpha controls the transparency of the bar.
N.B. when you have two overlapping bars, one with an alpha < 1, you will get a mixture of colours. As such the bar will appear purple even though the legend shows it as a light red. To alleviate this I have modified the width of one of the bars, this way even if your powers should change you will still be able to see both bars.
plt.xticks can be used to set the location and format of the x-ticks in your graph.
import matplotlib.pyplot as plt
import numpy as np
width = 0.8
highPower = [1184.53,1523.48,1521.05,1517.88,1519.88,1414.98,
1419.34,1415.13,1182.70,1165.17]
lowPower = [1000.95,1233.37, 1198.97,1198.01,1214.29,1130.86,
1138.70,1104.12,1012.95,1000.36]
indices = np.arange(len(highPower))
plt.bar(indices, highPower, width=width,
color='b', label='Max Power in mW')
plt.bar([i+0.25*width for i in indices], lowPower,
width=0.5*width, color='r', alpha=0.5, label='Min Power in mW')
plt.xticks(indices+width/2.,
['T{}'.format(i) for i in range(len(highPower))] )
plt.legend()
plt.show()
Building on #Ffisegydd's answer, if your data is in a Pandas DataFrame, this should work nicely:
def overlapped_bar(df, show=False, width=0.9, alpha=.5,
title='', xlabel='', ylabel='', **plot_kwargs):
"""Like a stacked bar chart except bars on top of each other with transparency"""
xlabel = xlabel or df.index.name
N = len(df)
M = len(df.columns)
indices = np.arange(N)
colors = ['steelblue', 'firebrick', 'darksage', 'goldenrod', 'gray'] * int(M / 5. + 1)
for i, label, color in zip(range(M), df.columns, colors):
kwargs = plot_kwargs
kwargs.update({'color': color, 'label': label})
plt.bar(indices, df[label], width=width, alpha=alpha if i else 1, **kwargs)
plt.xticks(indices + .5 * width,
['{}'.format(idx) for idx in df.index.values])
plt.legend()
plt.title(title)
plt.xlabel(xlabel)
plt.ylabel(ylabel)
if show:
plt.show()
return plt.gcf()
And then in a python command line:
low = [1000.95, 1233.37, 1198.97, 1198.01, 1214.29, 1130.86, 1138.70, 1104.12, 1012.95, 1000.36]
high = [1184.53, 1523.48, 1521.05, 1517.88, 1519.88, 1414.98, 1419.34, 1415.13, 1182.70, 1165.17]
df = pd.DataFrame(np.matrix([high, low]).T, columns=['High', 'Low'],
index=pd.Index(['T%s' %i for i in range(len(high))],
name='Index'))
overlapped_bar(df, show=False)
It is actually simpler than the answers all over the internet make it appear.
a = range(1,10)
b = range(4,13)
ind = np.arange(len(a))
fig = plt.figure()
ax = fig.add_subplot(111)
ax.bar(x=ind, height=a, width=0.35,align='center')
ax.bar(x=ind, height=b, width=0.35/3, align='center')
plt.xticks(ind, a)
plt.tight_layout()
plt.show()