How to plot a bar chart with multiple x-axis data? - python

I'd like to plot a bar chart in Python, similar to Excel. However, I am struggling to have two different x-axes. For example, for each size (like 8M), I want to plot the results of all 5 strategies. For each strategy, there are 3 metrics (Fit, boot, and exp).
You can download the original excel file here here.
This is my code so far:
df = pd.read_excel("data.xlsx",sheet_name="Sheet1")
r1= df['Fit']
r2= df['Boot']
r3= df['Exp']
x= df['strategy']
n_groups = 5
# create plot
fig, ax = plt.subplots()
index = np.arange(n_groups)
names = ["8M","16M","32M","64M","128M"]
bar_width = 0.1
opacity = 0.8
Fit8= [r1[0],r1[1],r1[2],r1[3],r1[4]]
Boot8= [r2[0],r2[1],r2[2],r2[3],r2[4]]
Exp8= [r3[0],r3[1],r3[2],r3[3],r3[4]]
Fit16= [r1[5],r1[6],r1[7],r1[8],r1[9]]
Boot16= [r2[5],r2[6],r2[7],r2[8],r2[9]]
Exp16= [r3[5],r3[6],r3[7],r3[8],r3[9]]
rects1 = plt.bar(
index, Fit8, bar_width,
alpha=opacity,
color='g',
label='Fit'
)
rects2 = plt.bar(
index + 0.1, Boot8, bar_width,
alpha=opacity,
color='b',
label='Boot'
)
rects3 = plt.bar(
index + 0.2, Exp8, bar_width,
alpha=opacity,
color='y',
label='EXP'
)
rects4 = plt.bar(
index + 0.5, Fit16, bar_width,
alpha=opacity,
color='g'
)
rects5 = plt.bar(
index + 0.6, Boot16, bar_width,
alpha=opacity,
color='b'
)
rects6 = plt.bar(
index + 0.7, Exp16, bar_width,
alpha=opacity,
color='y'
)
plt.xticks(index + 0.2, (names))
plt.legend()
plt.tight_layout()
plt.show()

Something like this?
Here the code:
import pandas as pd
import pylab as plt
# read dataframe, take advantage of Multiindex
df = pd.read_excel(
"data.xlsx",
sheet_name="Sheet1", engine='openpyxl',
index_col=[0, 1],
)
# plot the content of the dataframe
ax = df.plot.bar()
# Show minor ticks
ax.minorticks_on()
# Get location of the center of each bar
bar_locations = list(map(lambda x: x.get_x() + x.get_width() / 2., ax.patches))
# Set minor and major tick positions
# Minor are used for S1, ..., S5
# Major for sizes 8M, ..., 128M
# tick locations are sorted according to the 3 metrics, so first all the 25 bars for the fit, then the 25
# for the boot and at the end the 25 for the exp. We set the major tick at the position of the bar at the center
# of the size group, that is the third boot bar of each size.
ax.set_xticks(bar_locations[27:50:5], minor=False) # use the 7th bar of each size group
ax.set_xticks(bar_locations[len(df):2 * len(df)], minor=True) # use the bar in the middle of each group of 3 bars
# Labels for groups of 3 bars and for each group of size
ax.set_xticklabels(df.index.get_level_values(0)[::5], minor=False, rotation=0)
ax.set_xticklabels(df.index.get_level_values(1), minor=True, rotation=0)
# Set tick parameters
ax.tick_params(axis='x', which='major', pad=15, bottom='off')
ax.tick_params(axis='x', which='both', top='off')
# You can use a different color for each group
# You can comment out these lines if you don't like it
size_colors = 'rgbym'
# major ticks
for l, c in zip(ax.get_xticklabels(minor=False), size_colors):
l.set_color(c)
l.set_fontweight('bold')
# minor ticks
for i, l in enumerate(ax.get_xticklabels(minor=True)):
l.set_color(size_colors[i // len(size_colors)])
# remove x axis label
ax.set_xlabel('')
plt.tight_layout()
plt.show()
The main idea here is to use the Multiindex of Pandas, with some minor tweaks.
EDIT
If you want spaces between groups, you can add a dummy category (a.k.a strategy) in the dataframe to create an artificial space, obtaining:
Here the code:
import numpy as np
import pandas as pd
import pylab as plt
# read dataframe, take advantage of Multiindex
df = pd.read_excel(
"data.xlsx",
sheet_name="Sheet1", engine='openpyxl',
index_col=[0, 1],
)
# plot the content of the dataframe
sizes = list(df.index.get_level_values(0).drop_duplicates())
strategies = list(df.index.get_level_values(1).drop_duplicates())
n_sizes = len(sizes)
n_strategies = len(strategies)
n_metrics = len(df.columns)
empty_rows = pd.DataFrame(
data=[[np.nan] * n_metrics] * n_sizes, index=pd.MultiIndex.from_tuples([(s, 'SN') for s in sizes], names=df.index.names),
columns=df.columns,
)
old_columns = list(df.columns)
df = df.merge(empty_rows, how='outer', left_index=True, right_index=True, sort=False).drop(
columns=[f'{c}_y' for c in df.columns]
).sort_index(
ascending=True, level=0, key=lambda x: sorted(x, key=lambda y: int(y[:-1]))
)
df.columns = old_columns
# Update number of strategies
n_strategies += 1
# Plot with Pandas
ax = df.plot.bar()
# Show minor ticks
ax.minorticks_on()
# Get location of the center of each bar
bar_locations = list(map(lambda x: x.get_x() + x.get_width() / 2., ax.patches))
# Set minor and major tick positions
# Major for sizes 8M, ..., 128M
# Minor are used for S1, ..., S5, SN
# Tick locations are sorted according to the 3 metrics, so first 30 (5 sizes * 6 strategies) bars for the fit,
# then 30 (5 sizes * 6 strategies) for the boot and at the end 30 (5 sizes * 6 strategies) for the exp.
# We set the major tick at the position of the bar at the center of the size group (+7),
# that is the third boot bar of each size.
n_bars_per_metric = n_sizes * n_strategies
strategy_ticks = bar_locations[len(df):2 * len(df)]
strategy_ticks = np.concatenate([strategy_ticks[b * n_strategies:b * n_strategies + n_strategies - 1] for b in range(n_sizes)]) # get only positions of the first 5 bars
size_ticks = strategy_ticks[2::n_sizes] + 0.01
ax.set_xticks(size_ticks, minor=False) # use the 7th bar of each size group
ax.set_xticks(strategy_ticks, minor=True) # use the bar in the middle of each group of 3 bars
# Labels for groups of 3 bars and for each group of size
ax.set_xticklabels(sizes, minor=False, rotation=0)
ax.set_xticklabels(strategies * n_sizes, minor=True, rotation=0)
# Set tick parameters
ax.tick_params(axis='x', which='major', pad=15, bottom=False)
ax.tick_params(axis='x', which='both', top=False)
# You can use a different color for each group
# You can comment out these lines if you don't like it
size_colors = 'rgbym'
# major ticks
for l, c in zip(ax.get_xticklabels(minor=False), size_colors):
l.set_color(c)
l.set_fontweight('bold')
# minor ticks
for i, l in enumerate(ax.get_xticklabels(minor=True)):
l.set_color(size_colors[i // len(size_colors)])
# remove x axis label
ax.set_xlabel('')
plt.tight_layout()
plt.show()
As you can see, you have to play with the DataFrame, adding some extra code. Maybe there is a simpler solution, but it was the first that I can think of.

Related

What is wrong with my multiple line graph plotting?

I am attempting to plot multiple line graphs in a graph table itself. However, I run into an error that mentioned:
No artists with labels found to put in legend. Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
Not only this happened but my legend tables of the 3 lines don't merge together and my X-axis does not show the months but random numbers from my dataframe. Here is my code and graph result to look through.
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
df = pd.read_excel (r'C:\Users\admin\Desktop\Question Folder\Sales of top 30 customers.xlsx')
#Refine and adjust the dataframe for suitable manipulation
df = df.drop('Unnamed: 0', axis = 1)
df = df.iloc[2: , :]
row_detail = df.head(1).values.tolist()
row_detail = row_detail[0]
a = df.iloc[-3:, :].values.tolist()
a = a[0]
df.columns = row_detail
df = df.iloc[1:, :]
print(df) # This is for checking purpose
# This creates a dataframe needed for the practice
df1 = df.iloc[:3]
# This is to plot a line graph from df1
df_chosen = df1
a = 0
# Turning data row of a customer into a list
data_row_1 = df_chosen.iloc[a].values.tolist()
data_row_2 = df_chosen.iloc[a + 1].values.tolist()
data_row_3 = df_chosen.iloc[a + 2].values.tolist()
date = data_row_1[1:]
cus_1 = data_row_1[0]
cus_2 = data_row_2[0]
cus_3 = data_row_3[0]
y1 = data_row_1[1:]
y2 = data_row_2[1:]
y3 = data_row_3[1:]
x = np.arange(len(date)) # the label locations
width = 0.60 # the width of the bars
fig, ax = plt.subplots()
# Increase size of plot in jupyter
plt.rcParams["figure.figsize"] = (20,15)
plt.rcParams.update({'font.size':25})
# Add some text for labels, title and custom x-axis tick labels, etc.
ax.set_xlabel('Months', fontsize=30)
ax.set_ylabel('Sales', fontsize=30)
ax.set_title('Monthly Sales from ' + cus_1 +", " + cus_2+ " and " + cus_3, fontsize=30)
ax.set_xticks(x, date)
ax.set_ylim(bottom = 0, top = 1000)
legend1 = plt.legend(())
ax.legend(loc='best', fontsize=30)
plt.grid(True)
# set up the 1st line graph
ax.plot(x, y1, "r", label = cus_1, marker='x')
#ax.set_yticks(
ax.grid(True) # turn on grid #1
ax.set_ylim(bottom = 0, top = 1000)
ax.legend(loc='upper left', fontsize=25)
ax2 = ax.twinx()
ax2.plot(x, y2, "b", label= cus_2, marker='x')
ax2.set_yticks([])
ax2.grid(False) # turn off grid #2
ax2.set_ylim(bottom = 0, top = 10000)
ax2.legend(loc='upper left', fontsize=25)
ax3 = ax2.twinx()
ax3.plot(x, y3, "g", label= cus_3, marker='x')
ax3.set_yticks([])
ax3.grid(False) # turn off grid #2
ax3.set_ylim(bottom = 0, top = 10000)
ax3.legend(loc='upper left', fontsize=25)
I just need to understand and know the solutions for the following:
Why is the X-axis not showing the months' names?
Why is the 3 separate legend tables not connected together?
How do I avoid the 'No artists with labels found to put in legend. Note that artists whose label start with an underscore are ignored when legend() is called with no argument.' error warning?
Hope to receive a favorable reply soon. :)
Edit notice: Here is the dataframe used for this problem:

adjusting horizontal bar chart matplotlib to accommodate the bars

I am doing a horizontal bar chart but struggling with adjusting ylim, or maybe another parameter to make my labels clearer and make all the labels fit the y axis . I played around with ylim and the text size can be bigger or smaller but the bars do not fit the y axis. Any idea about the right approach?
My code:
import matplotlib.pyplot as plt #we load the library that contains the plotting capabilities
from operator import itemgetter
D=[]
for att, befor, after in zip(df_portion['attributes'], df_portion['2005_2011 (%)'], df_portion['2012_2015 (%)']):
i=(att, befor, after)
D.append(i)
Dsort = sorted(D, key=itemgetter(1), reverse=False) #sort the list in order of usage
attri = [x[0] for x in Dsort]
aft = [x[1] for x in Dsort]
bef = [x[2] for x in Dsort]
ind = np.arange(len(attri))
width=3
ax = plt.subplot(111)
ax.barh(ind, aft, width,align='center',alpha=1, color='r', label='from 2012 to 2015') #a horizontal bar chart (use .bar instead of .barh for vertical)
ax.barh(ind - width, bef, width, align='center', alpha=1, color='b', label='from 2005 to 2008') #a horizontal bar chart (use .bar instead of .barh for vertical)
ax.set(yticks=ind, yticklabels=attri,ylim=[1, len(attri)/2])
plt.xlabel('Frequency distribution (%)')
plt.title('Frequency distribution (%) of common attributes between 2005_2008 and between 2012_2015')
plt.legend()
plt.show()
This is the plot for above code
To make the labels fit, you need to set a smaller fontsize, or use a larger figsize. Changing the ylim will either just show a subset of the bars (in case ylim is set too narrow), or will show more whitespace (when ylim is larger).
The biggest problem in the code is width being too large. Twice the width needs to fit over a distance of 1.0 (the ticks are placed via ind, which is an array 0,1,2,...). As matplotlib calls the thickness of a horizontal bar plot "height", this name is used in the example code below. Using align='edge' lets you position the bars directly (align='center' will move them half their "height").
Pandas has simple functions to sort dataframes according to one or more rows.
Code to illustrate the ideas:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
# first create some test data
df = pd.DataFrame({'attributes': ["alpha", "beta", "gamma", "delta", "epsilon", "zata", "eta", "theta", "iota",
"kappa", "lambda", "mu", "nu", "xi", "omikron", "pi", "rho", "sigma", "tau",
"upsilon", "phi", "chi", "psi", "omega"]})
totals_2005_2011 = np.random.uniform(100, 10000, len(df))
totals_2012_2015 = totals_2005_2011 * np.random.uniform(0.70, 2, len(df))
df['2005_2011 (%)'] = totals_2005_2011 / totals_2005_2011.sum() * 100
df['2012_2015 (%)'] = totals_2012_2015 / totals_2012_2015.sum() * 100
# sort all rows via the '2005_2011 (%)' column, sort from large to small
df = df.sort_values('2005_2011 (%)', ascending=False)
ind = np.arange(len(df))
height = 0.3 # two times height needs to be at most 1
fig, ax = plt.subplots(figsize=(12, 6))
ax.barh(ind, df['2012_2015 (%)'], height, align='edge', alpha=1, color='crimson', label='from 2012 to 2015')
ax.barh(ind - height, df['2005_2011 (%)'], height, align='edge', alpha=1, color='dodgerblue', label='from 2005 to 2011')
ax.set_yticks(ind)
ax.set_yticklabels(df['attributes'], fontsize=10)
ax.grid(axis='x')
ax.set_xlabel('Frequency distribution (%)')
ax.set_title('Frequency distribution (%) of common attributes between 2005_2011 and between 2012_2015')
ax.legend()
ax.margins(y=0.01) # use smaller margins in the y-direction
plt.tight_layout()
plt.show()
The seaborn library has some functions to create barplots with multiple bars per attribute, without the need to manually fiddle with bar positions. Seaborn prefers its data in "long form", which can be created via pandas' melt().
Example code:
import seaborn as sns
df = df.sort_values('2005_2011 (%)', ascending=True)
df_long = df.melt(id_vars='attributes', value_vars=['2005_2011 (%)', '2012_2015 (%)'],
var_name='period', value_name='distribution')
fig, ax = plt.subplots(figsize=(12, 6))
sns.barplot(data=df_long, y='attributes', x='distribution', hue='period', palette='turbo', ax=ax)
ax.set_xlabel('Frequency distribution (%)')
ax.set_title('Frequency distribution (%) of common attributes between 2005_2011 and between 2012_2015')
ax.grid(axis='x')
ax.tick_params(axis='y', labelsize=12)
sns.despine()
plt.tight_layout()
plt.show()

subplots, how to set the xlabel and xlim, but removing axis

I'd like to plot EEG data and get this result:
But I am stuck on how to display the x axis label and its xlim.
After reading other questions, which use set_visible(False), I cannot resolve my issue.
I write my code in order to be reproducible:
sfreq = 256
raw_data = np.random.rand(14, 1000 * sfreq)
duration = 10 # duration of the signal
start = 200 * sfreq
final = start + int(sfreq * duration)
channels = list(np.arange(1, len(channels) + 1 ))
fig, ax = plt.subplots(len(channels), 1, sharex=True, figsize=(10, 10))
for idx, node in enumerate(channels):
data = raw_data[idx, start:final]
times = np.arange(1, data.size + 1) / sfreq
ax[idx].plot(times, data, lw=1., ls='-', c='k')
ax[idx].axis('off') # to remove bounding subplot
ax[idx].set_yticks([]) # to remove values from y axis
ax[idx].text(-1, 0, node, fontsize=12) # write text
# plt.axis(True)
# plt.axes().get_xaxis().set_visible(True)
# plt.xlim([200, 220])
plt.xlabel('Time (seconds)', fontsize=12)
plt.tight_layout()
plt.show()
This is my result:
But I'd like this:
Here are some possible changes to the plot:
make the code more python by using zip instead of an index in the for loop
change the visibility of the "spines" (the lines surrounding the subplot) instead of use axis('off')
remove the padding (margins)
use the axes transform to position the text of the y-axis
...
import matplotlib.pyplot as plt
import numpy as np
sfreq = 256
raw_data = np.random.rand(14, 1000 * sfreq)
duration = 10 # duration of the signal
start = 200 * sfreq
final = start + int(sfreq * duration)
channels = np.arange(len(raw_data)) + 1
fig, axs = plt.subplots(len(channels), 1, sharex=True, figsize=(10, 10))
for ax, node, data in zip(axs, channels, raw_data):
data = data[start:final]
times = np.arange(1, data.size + 1) / sfreq
ax.plot(times, data, lw=1., ls='-', c='k')
ax.set_yticks([]) # remove y ticks
for sp in ax.spines:
ax.spines[sp].set_visible(False) # hide the 4 lines surrounding the subplot
ax.text(-0.01, 0.5, node, fontsize=12, ha='right', va='center', transform=ax.transAxes) # write text
ax.margins(x=0) # avoid the empty space left and right
if ax != axs[-1]:
# ax.tick_params(axis='x', length=0) # hide the tick marks
ax.tick_params(bottom=False) # no tick marks at the bottom
axs[-1].set_xlabel('Time (seconds)', fontsize=12, labelpad=-10) # use negative padding to get closer to the xaxis
axs[-1].set_xticks([0, duration])
axs[-1].set_xticklabels([start // sfreq, final // sfreq])
axs[-1].spines['bottom'].set_bounds([0, duration]) # only draw the spine between the two ticks
axs[-1].spines['bottom'].set_visible(True)
axs[-1].spines['bottom'].set_linewidth(2)
plt.tight_layout()
plt.show()

Plotting a Bar Chart on matplotlib

How can I plot a horizontal bar chart with the values at the end of the bar, Something similar to this
I tried this
plt.barh(inc.index,inc)
plt.yticks(inc.index)
plt.xticks(inc);
plt.xlabel("Order Count")
plt.ylabel("Date")
Bar chart
The answer can be found here:
How to display the value of the bar on each bar with pyplot.barh()?
Just add the for loop as cphlewis said:
for i, v in enumerate(inc):
ax.text(v + 3, i + .25, str(v), color='blue', fontweight='bold')
plt.show()
Here is the code that I tried for your situation:
import matplotlib.pyplot as plt
import numpy as np
inc = [12, 25, 50, 65, 40, 45]
index = ["2019-10-31", "2019-10-30", "2019-10-29", "2019-10-28", "2019-10-27", "2019-10-26"]
fig, ax = plt.subplots()
ax.barh(index,inc, color='black')
plt.yticks(index)
plt.xticks(inc);
plt.xlabel("Order Count")
plt.ylabel("Date")
# Set xticks
plt.xticks(np.arange(0, max(inc)+15, step=10))
# Loop for showing inc numbers in the end of bar
for i, v in enumerate(inc):
ax.text(v + 1, i, str(v), color='black', fontweight='bold')
plt.show()
Plot looks like this:
To generate a plot with values superimposed, run:
ax = inc.plot.barh(xticks=inc, xlim=(0, 40));
ax.set_xlabel('Order Count')
ax.set_ylabel('Date')
for p in ax.patches:
w = p.get_width()
ax.annotate(f' {w}', (w + 0.1, p.get_y() + 0.1))
Note that I set xlim with upper limit slightly above the
maximum Order Count, to provide the space for annotations.
For a subset of your data I got:
And one more impovement:
As I see, your data is a Series with a DatetimeIndex.
So if you want to have y label values as dates only (without
00:00:00 for hours), convert the index to string:
inc.index = inc.index.strftime('%Y-%m-%d')
like I did, generating my plot.

Clustered stacked bar plot with error bars

I am using the code (posted here: https://stackoverflow.com/a/22845857/6649485) below to generate a clustered stacked bar plot. Unfortunately the error bars are not shifted in the same way as the data bars. I am not sure how to adress them and set their x-value accordingly.
def plot_clustered_stacked(dfall, labels=None, title="SEC stress study", H="/", **kwargs):
"""Given a list of dataframes, with identical columns and index, create a clustered stacked bar plot. labels is a list of the names of the dataframe, used for the legend title is a string for the title of the plot H is the hatch used for identification of the different dataframe"""
n_df = len(dfall)
n_col = len(dfall[0].columns)
n_ind = len(dfall[0].index)
axe = plt.subplot(111)
for df in dfall : # for each data frame
axe = df.plot(kind="bar",
linewidth=0,
stacked=True,
ax=axe,
legend=False,
grid=False,
yerr=0.1,
**kwargs) # make bar plots
h,l = axe.get_legend_handles_labels() # get the handles we want to modify
for i in range(0, n_df * n_col, n_col): # len(h) = n_col * n_df
for j, pa in enumerate(h[i:i+n_col]):
for rect in pa.patches: # for each index
rect.set_x(rect.get_x() + 1 / float(n_df + 1) * i / float(n_col))
rect.set_hatch(H * int(i / n_col)) #edited part
rect.set_width(1 / float(n_df + 1))
axe.set_xticks((np.arange(0, 2 * n_ind, 2) + 1 / float(n_df + 1)) / 2.)
axe.set_xticklabels(df.index, rotation = 0)
axe.set_title(title)
# Add invisible data to add another legend
n=[]
for i in range(n_df):
n.append(axe.bar(0, 0, color="gray", hatch=H * i))
l1 = axe.legend(h[:n_col], l[:n_col], loc=[1.01, 0.5])
if labels is not None:
l2 = plt.legend(n, labels, loc=[1.01, 0.1])
axe.add_artist(l1)
return axe
This is what I came up with by now.
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
def plot_clustered_stacked(dfall):
for j, df in enumerate(dfall):
set_count=1
width = 0.2
N=len(df.index)
b_width = 0.2
index=np.arange(N/set_count)
labels=df.index
p1 = plt.bar(index +j*b_width, df['Average Monomer Area'], width=width, yerr='0.5 Stdev Monomer Area', data=df)
p2 = plt.bar(index +j*b_width, df['Average HMW Area'], bottom=df['Average Monomer Area'], width=width, yerr='0.5 Stdev HMW Area', data=df)
plt.xticks(index+b_width, labels)
plt.legend((p1[0], p2[0]), ('Average Monomer Area', 'Average
HMW Area'))
data=pd.read_csv("SEC.csv")
df1 = data[data['Time'].str.contains("0 weeks")].drop(['High/low', 'Time'], axis=1)
df1.set_index('Excipient Name', inplace=True)
df2 = data[data['Time'].str.contains("1 week")].drop(['High/low', 'Time'], axis=1)
df2.set_index('Excipient Name', inplace=True)
df3 = data[data['Time'].str.contains("3 weeks")].drop(['High/low', 'Time'], axis=1)
df3.set_index('Excipient Name', inplace=True)
plot_clustered_stacked([df1, df2, df3])
plt.show()

Categories