I am trying to draw a Bar chart that looks like the one below, I am not sure how to set a percentage value in each column top, and a legend at the right side. My code snippets below. It's working, however it's missing the percentage value and legend.
import matplotlib.pyplot as plt; plt.rcdefaults()
import numpy as np
import matplotlib.pyplot as plt
objects = ('18-25', '26-30', '31-40', '40-50')
y_pos = np.arange(len(objects))
performance = [13, 18, 16, 3]
width = 0.35 # the width of the bars
plt.bar(y_pos, performance, align='center', alpha=0.5, color=('red', 'green', 'blue', 'yellow'))
plt.xticks(y_pos, objects)
plt.ylabel('%User', fontsize=16)
plt.title('Age of Respondents', fontsize=20)
width = 0.35
plt.show()
The legend colors were slightly different than the plot colors because alpha=0.5, which has been removed.
imagecolorpicker.com was used to select the correct blue and green.
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.patches import Patch
color = ('red', '#00b050', '#00b0f0', 'yellow')
objects = ('18-25', '26-30', '31-40', '40-50')
y_pos = np.arange(len(objects))
performance = [13, 18, 16, 3]
width = 0.35 # the width of the bars
plt.bar(y_pos, performance, align='center', color=color)
plt.xticks(y_pos, objects)
plt.ylim(0, 20) # this adds a little space at the top of the plot, to compensate for the annotation
plt.ylabel('%User', fontsize=16)
plt.title('Age of Respondents', fontsize=20)
# map names to colors
cmap = dict(zip(performance, color))
# create the rectangles for the legend
patches = [Patch(color=v, label=k) for k, v in cmap.items()]
# add the legend
plt.legend(title='Number of Trips', labels=objects, handles=patches, bbox_to_anchor=(1.04, 0.5), loc='center left', borderaxespad=0, fontsize=15, frameon=False)
# add the annotations
for y, x in zip(performance, y_pos):
plt.annotate(f'{y}%\n', xy=(x, y), ha='center', va='center')
Annotation Resources - from matplotlib v3.4.2
Adding value labels on a matplotlib bar chart
How to annotate each segment of a stacked bar chart
Stacked Bar Chart with Centered Labels
How to plot and annotate multiple data columns in a seaborn barplot
How to annotate a seaborn barplot with the aggregated value
stack bar plot in matplotlib and add label to each section
How to add multiple annotations to a barplot
How to plot and annotate a grouped bar chart
How to plot a horizontal stacked bar with annotations
Related
I want to make boxplots with hues but I want to color code it so that each specific X string is a certain color with the hue just being a lighter color. I am able to do a boxplot without a hue. When I incorporate the hue, I get the second boxplot which loses the colors. Can someone help me customize the colors for the figure that contains the hue?
Essentially, its what the answer for this question is but with boxplots.
This is my code:
first boxplot
order=['Ash1','E1A','FUS','p53']
colors=['gold','teal','darkorange','royalblue']
color_dict=dict(zip(order,colors))
fig,ax=plt.subplots(figsize=(25,15))
bp=sns.boxplot(data=df_idrs, x=df_idrs["construct"], y=df_idrs['Norm_Ef_IDR/Ef_GS'],ax=ax,palette=color_dict)
sns.stripplot(ax=ax,y='Norm_Ef_IDR/Ef_GS', x='construct', data=df_idrs,palette=color_dict,
jitter=1, marker='o', alpha=0.4,edgecolor='black',linewidth=1, dodge=True)
ax.axhline(y=1,linestyle="--",color='black',linewidth=2)
plt.legend(loc='upper left', bbox_to_anchor=(1.03, 1))
second boxplot
order=['Ash1','E1A','FUS','p53']
colors=['gold','teal','darkorange','royalblue']
color_dict=dict(zip(order,colors))
fig,ax=plt.subplots(figsize=(25,15))
bp=sns.boxplot(data=df_idrs, x=df_idrs["construct"], y=df_idrs['Norm_Ef_IDR/Ef_GS'],ax=ax, hue=df_idrs["location"])
sns.stripplot(y='Norm_Ef_IDR/Ef_GS', x='construct', data=df_idrs, hue=df_idrs["location"],
jitter=1, marker='o', alpha=0.4,edgecolor='black',linewidth=1, dodge=True)
ax.axhline(y=1,linestyle="--",color='black',linewidth=2)
plt.legend(loc='upper left', bbox_to_anchor=(1.03, 1))
The only thing that changed was the palette to hue. I have seen many examples on here but I am unable to get them to work. Using the second code, I have tried the following:
Nothing happens for this one.
for ind, bp in enumerate(ax.findobj(PolyCollection)):
rgb = to_rgb(colors[ind // 2])
if ind % 2 != 0:
rgb = 0.5 + 0.5 * np.array(rgb) # make whiter
bp.set_facecolor(rgb)
I get index out of range for the following one.
for i in range(0,4):
mybox = bp.artists[i]
mybox.set_facecolor(color_dict[order[i]])
Matplotlib stores the boxes in ax.patches, but there are also 2 dummy patches (used to construct the legend) that need to be filtered away. The dots of the stripplot are stored in ax.collections. There are also 2 dummy collections for the legend, but as those come at the end, they don't form a problem.
Some remarks:
sns.boxplot returns the subplot on which it was drawn; as it is called with ax=ax it will return that same ax
Setting jitter=1in the stripplot will smear the dots over a width of 1. 1 is the distance between the x positions, and the boxes are only 0.4 wide. To avoid clutter, the code below uses jitter=0.4.
Here is some example code starting from dummy test data:
from matplotlib import pyplot as plt
from matplotlib.legend_handler import HandlerTuple
from matplotlib.patches import PathPatch
from matplotlib.colors import to_rgb
import seaborn as sns
import pandas as pd
import numpy as np
np.random.seed(20230215)
order = ['Ash1', 'E1A', 'FUS', 'p53']
colors = ['gold', 'teal', 'darkorange', 'royalblue']
hue_order = ['A', 'B']
df_idrs = pd.DataFrame({'construct': np.repeat(order, 200),
'Norm_Ef_IDR/Ef_GS': (np.random.normal(0.03, 1, 800).cumsum() + 10) / 15,
'location': np.tile(np.repeat(hue_order, 100), 4)})
fig, ax = plt.subplots(figsize=(12, 5))
sns.boxplot(data=df_idrs, x=df_idrs['construct'], y=df_idrs['Norm_Ef_IDR/Ef_GS'], hue='location',
order=order, hue_order=hue_order, ax=ax)
box_colors = [f + (1 - f) * np.array(to_rgb(c)) # whiten colors depending on hue
for c in colors for f in np.linspace(0, 0.5, len(hue_order))]
box_patches = [p for p in ax.patches if isinstance(p, PathPatch)]
for patch, color in zip(box_patches, box_colors):
patch.set_facecolor(color)
sns.stripplot(y='Norm_Ef_IDR/Ef_GS', x='construct', data=df_idrs, hue=df_idrs['location'],
jitter=0.4, marker='o', alpha=0.4, edgecolor='black', linewidth=1, dodge=True, ax=ax)
for collection, color in zip(ax.collections, box_colors):
collection.set_facecolor(color)
ax.axhline(y=1, linestyle='--', color='black', linewidth=2)
handles = [tuple(box_patches[i::len(hue_order)]) for i in range(len(hue_order))]
ax.legend(handles=handles, labels=hue_order, title='hue category',
handlelength=4, handler_map={tuple: HandlerTuple(ndivide=None, pad=0)},
loc='upper left', bbox_to_anchor=(1.01, 1))
plt.tight_layout()
plt.show()
How do I use a single legend for multiple geopandas plots?
Right now I have a Figure like this:
This post explains how to set legend values to the same for each plot. Though, i would like to have single legend for all plots. Optimally it should be possible to have multiple legends for different df's that I want to plot. E.g. the lines you see in the pictures also have a description.
Here is my current code:
years = [2005, 2009, 2013]
# initialize figure
fig, ax = plt.subplots(nrows=1, ncols=3, figsize=(10, 10), dpi=300, constrained_layout=True)
for i, year in enumerate(years):
# subset lines
lines_plot = lines[lines['year'] == year]
# subset controls plot
controls_plot = controls[controls['year'] == year]
# draw subfig
controls_plot.plot(column='pop_dens', ax=ax[i], legend=True, legend_kwds={'orientation': "horizontal"})
lines_plot.plot(ax=ax[i], color='red', lw=2, zorder=2)
Regarding the first of your questions 'How do I use a single legend for multiple geopandas plots?' you could make sure your plots all use the same colors (using the vmin and vmax args of the .plot() function) and then add a single colorbar to the figure like shown below. for the red lines you can just add another legend (the first thing is technically a colorbar not a legend).
import geopandas as gpd
from matplotlib import pyplot as plt
import matplotlib.cm as cm
import matplotlib.colors as mcolors
from matplotlib.lines import Line2D
world = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))
f, ax = plt.subplots(nrows=1, ncols=3, figsize=(9, 4))
# define min and max values and colormap for the plots
value_min = 0
value_max = 1e7
cmap = 'viridis'
world.plot(ax=ax[0], column='pop_est', vmin=value_min, vmax=value_max, cmap=cmap)
world.plot(ax=ax[1], column='pop_est', vmin=value_min, vmax=value_max, cmap=cmap)
world.plot(ax=ax[2], column='pop_est', vmin=value_min, vmax=value_max, cmap=cmap)
# define a mappable based on which the colorbar will be drawn
mappable = cm.ScalarMappable(
norm=mcolors.Normalize(value_min, value_max),
cmap=cmap
)
# define position and extent of colorbar
cb_ax = f.add_axes([0.1, 0.1, 0.8, 0.05])
# draw colorbar
cbar = f.colorbar(mappable, cax=cb_ax, orientation='horizontal')
# add handles for the legend
custom_lines = [
Line2D([0], [0], color='r'),
Line2D([0], [0], color='b'),
]
# define labels for the legend
custom_labels = ['red line', 'blue line']
# plot legend, loc defines the location
plt.legend(
handles=custom_lines,
labels=custom_labels,
loc=(.4, 1.5),
title='2nd legend',
ncol=2
)
plt.tight_layout()
plt.show()
I have four sets of data with eight (ordered) data points each. I would like to do a scatter plot where you can see the data points distinguished by the different data sets (symbol) and data points (color) with two legends. I prepared a minimal example below that shows what my scatter plot should look like.
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.colors as clrs
# t contains the labels for the data sets
t = [0.0, -0.5, -1.0, -1.5, -2.0, -2.5, -3.0, -3.5]
# create the color map
magma = plt.cm.get_cmap('magma', 8)
newcolors = magma(np.linspace(0, 1, 8))
cm = clrs.ListedColormap(newcolors)
fig, ax = plt.subplots()
# create for scatter plots for the four different data sets. Each scatter plot has a different marker and label.
scatter1 = ax.scatter(np.random.rand(8), np.random.rand(8), c=t, marker='o', label="label1", cmap=cm)
scatter2 = ax.scatter(np.random.rand(8), np.random.rand(8), c=t, marker='^', label="label2", cmap=cm)
scatter3 = ax.scatter(np.random.rand(8), np.random.rand(8), c=t, marker='d', label="label3", cmap=cm)
scatter4 = ax.scatter(np.random.rand(8), np.random.rand(8), c=t, marker='X', label="label4", cmap=cm)
# produce a legend with the unique colors from the scatter
legend1 = ax.legend(*scatter1.legend_elements(), loc="upper left", title="Legend 1")
for i in range(8):
legend1.legendHandles[i].set_color(newcolors[i])
ax.add_artist(legend1)
# create a second legend that shows the markers and labels
legend2 = ax.legend(loc="lower right", title="Legend 2")
legend2.legendHandles[0].set_color('m')
legend2.legendHandles[1].set_color('m')
legend2.legendHandles[2].set_color('m')
legend2.legendHandles[3].set_color('m')
ax.add_artist(legend2)
plt.show()
The only thing I'm not happy about is the color in Legend 2. I don't know why but, the set_color option only changes the color of the edges. The fill color is the first color in my colormap...
Thanks for any help!
Follow up question: How can I change the symbols in legend 1, e.g., squares instead of circles?
I am trying to set the legend of a bar plot using the values of a pandas dataframe. I searched and could not find a solution, I have used another snippet from SO to annotate the bars. The plot generated shows the bars from the series in different colors as I want and even with the values of the bars. In Excel, e.g., you can have a legend that shows the series values as legend. I am trying to get that functionality here.
Here's a MWE:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pylab import *
import seaborn, itertools
seaborn.set()
def flip(items, ncol):
return itertools.chain(*[items[i::ncol] for i in range(ncol)])
def annotateBars(row, ax=ax):
if row['A'] < 0.2:
color = 'black'
vertalign = 'bottom'
vertpad = 0.02
else:
color = 'white'
vertalign = 'top'
vertpad = -0.02
ax.text(row.name, row['A'] + vertpad, "{:.4f}%".format(row['A']),
zorder=10, rotation=90, color=color,
horizontalalignment='center',
verticalalignment=vertalign,
fontsize=14, weight='heavy')
labels1=["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"]
width = 0.75
my_colors = 'gbkymc'
arr1 = np.random.random((1, 5))
arr1_ind = np.arange((arr1.shape[1]))
df_arr1 = pd.DataFrame(zip(*arr1), index = arr1_ind, columns = ['A'])
ax = df_arr1.plot(kind='bar', width = 0.85, alpha = 0.5, color = my_colors)
# plt.xticks(arr1_ind+width/4, arr1_ind)
ax.set_xticks(arr1_ind)
ax.set_xticklabels([labels1[i] for i in arr1_ind])
hndls, lbls = ax.get_legend_handles_labels()
plt.legend(flip(hndls, 2), flip(labels1, 2), loc='best', ncol=2)
junk = df_arr1.apply(annotateBars, ax=ax, axis=1)
plt.tick_params(
axis='x', # changes apply to the x-axis
which='both', # both major and minor ticks are affected
bottom='off', # ticks along the bottom edge are off
top='off', # ticks along the top edge are off
labelbottom='off') # labels along the bottom edge are off
plt.tight_layout()
plt.show()
It sounds like you're wanting the legend to have one item per color.
Right now, you're only creating a single artist (a single call to bar), so the legend will only have one entry.
As a quick example of doing something similar to what you want:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
df = pd.DataFrame({
'value':np.random.random(5),
'label':['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday'],
'color':['g', 'b', 'k', 'y', 'm']})
fig, ax = plt.subplots()
# Plot each bar separately and give it a label.
for index, row in df.iterrows():
ax.bar([index], [row['value']], color=row['color'], label=row['label'],
alpha=0.5, align='center')
ax.legend(loc='best', frameon=False)
# More reasonable limits for a vertical bar plot...
ax.margins(0.05)
ax.set_ylim(bottom=0)
# Styling similar to your example...
ax.patch.set_facecolor('0.9')
ax.grid(color='white', linestyle='-')
ax.set(axisbelow=True, xticklabels=[])
plt.show()
I want to plot a bar chart or a histogram using matplotlib. I don't want a stacked bar plot, but a superimposed barplot of two lists of data, for instance I have the following two lists of data with me:
Some code to begin with :
import matplotlib.pyplot as plt
from numpy.random import normal, uniform
highPower = [1184.53,1523.48,1521.05,1517.88,1519.88,1414.98,1419.34,
1415.13,1182.70,1165.17]
lowPower = [1000.95,1233.37, 1198.97,1198.01,1214.29,1130.86,1138.70,
1104.12,1012.95,1000.36]
plt.hist(highPower, bins=10, histtype='stepfilled', normed=True,
color='b', label='Max Power in mW')
plt.hist(lowPower, bins=10, histtype='stepfilled', normed=True,
color='r', alpha=0.5, label='Min Power in mW')
I want to plot these two lists against the number of values in the two lists such that I am able to see the variation per reading.
You can produce a superimposed bar chart using plt.bar() with the alpha keyword as shown below.
The alpha controls the transparency of the bar.
N.B. when you have two overlapping bars, one with an alpha < 1, you will get a mixture of colours. As such the bar will appear purple even though the legend shows it as a light red. To alleviate this I have modified the width of one of the bars, this way even if your powers should change you will still be able to see both bars.
plt.xticks can be used to set the location and format of the x-ticks in your graph.
import matplotlib.pyplot as plt
import numpy as np
width = 0.8
highPower = [1184.53,1523.48,1521.05,1517.88,1519.88,1414.98,
1419.34,1415.13,1182.70,1165.17]
lowPower = [1000.95,1233.37, 1198.97,1198.01,1214.29,1130.86,
1138.70,1104.12,1012.95,1000.36]
indices = np.arange(len(highPower))
plt.bar(indices, highPower, width=width,
color='b', label='Max Power in mW')
plt.bar([i+0.25*width for i in indices], lowPower,
width=0.5*width, color='r', alpha=0.5, label='Min Power in mW')
plt.xticks(indices+width/2.,
['T{}'.format(i) for i in range(len(highPower))] )
plt.legend()
plt.show()
Building on #Ffisegydd's answer, if your data is in a Pandas DataFrame, this should work nicely:
def overlapped_bar(df, show=False, width=0.9, alpha=.5,
title='', xlabel='', ylabel='', **plot_kwargs):
"""Like a stacked bar chart except bars on top of each other with transparency"""
xlabel = xlabel or df.index.name
N = len(df)
M = len(df.columns)
indices = np.arange(N)
colors = ['steelblue', 'firebrick', 'darksage', 'goldenrod', 'gray'] * int(M / 5. + 1)
for i, label, color in zip(range(M), df.columns, colors):
kwargs = plot_kwargs
kwargs.update({'color': color, 'label': label})
plt.bar(indices, df[label], width=width, alpha=alpha if i else 1, **kwargs)
plt.xticks(indices + .5 * width,
['{}'.format(idx) for idx in df.index.values])
plt.legend()
plt.title(title)
plt.xlabel(xlabel)
plt.ylabel(ylabel)
if show:
plt.show()
return plt.gcf()
And then in a python command line:
low = [1000.95, 1233.37, 1198.97, 1198.01, 1214.29, 1130.86, 1138.70, 1104.12, 1012.95, 1000.36]
high = [1184.53, 1523.48, 1521.05, 1517.88, 1519.88, 1414.98, 1419.34, 1415.13, 1182.70, 1165.17]
df = pd.DataFrame(np.matrix([high, low]).T, columns=['High', 'Low'],
index=pd.Index(['T%s' %i for i in range(len(high))],
name='Index'))
overlapped_bar(df, show=False)
It is actually simpler than the answers all over the internet make it appear.
a = range(1,10)
b = range(4,13)
ind = np.arange(len(a))
fig = plt.figure()
ax = fig.add_subplot(111)
ax.bar(x=ind, height=a, width=0.35,align='center')
ax.bar(x=ind, height=b, width=0.35/3, align='center')
plt.xticks(ind, a)
plt.tight_layout()
plt.show()