I'm trying to make this swarmplot with seaborn
My problem is that the swarms are too wide. I want to be able to break them up into rows of maximum 3 dots per row
This is my code:
# Import modules
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
###
# Import and clead dataset
url = "https://raw.githubusercontent.com/amirnakar/scratchboard/master/Goodreads/goodreads_library_export.csv"
Books = pd.read_csv(url)
Books = Books[Books['Date Read'].notna()] # Remove NA
Books['Year'] = pd.to_datetime( # Convert to dates
Books['Date Read'],
format='%YYYY%mm%dd',
errors='coerce')
Books['Year'] = pd.DatetimeIndex(Books['Date Read']).year # Take only years
Books[['Year', 'Date Read']] # merge the years in
###
# Calculate mean rate by year
RateMeans = (Books["My Rating"].groupby(Books["Year"]).mean())
Years = list(RateMeans.index.values)
Rates = list(RateMeans)
RateMeans = pd.DataFrame(
{'Years': Years,
'Rates': Rates
})
###
# Plot
fig,ax = plt.subplots(figsize=(20,10))
## Violin Plot:
plot = sns.violinplot(
data=Books,
x = "Year",
y = 'My Rating',
ax=ax,
color = "white",
inner=None,
#palette=colors_from_values(ArrayRates[:,1], "Blues")
)
## Swarm Plot
plot = sns.swarmplot(
data=Books,
x = "Year",
y = 'My Rating',
ax=ax,
hue = "My Rating",
size = 10
)
## Style
### Title
ax.text(x=0.5, y=1.1, s='Book Ratings: Distribution per Year', fontsize=32, weight='bold', ha='center', va='bottom', transform=ax.transAxes)
ax.text(x=0.5, y=1.05, s='Source: Goodreads.com (amirnakar)', fontsize=24, alpha=0.75, ha='center', va='bottom', transform=ax.transAxes)
### Axis
ax.set(xlim=(4.5, None), ylim=(0,6))
#ax.set_title('Book Ratings: Distribution per Year \n', fontsize = 32)
ax.set_ylabel('Rating (out of 5 stars)', fontsize = 24)
ax.set_xlabel('Year', fontsize = 24)
ax.set_yticklabels(ax.get_yticks().astype(int), size=20)
ax.set_xticklabels(ax.get_xticks(), size=20)
### Legend
plot.legend(loc="lower center", ncol = 5 )
### Colour pallete
colorset = ["#FAFF04", "#FFD500", "#9BFF00", "#0099FF", "#000BFF"]
colorset.reverse()
sns.set_palette(sns.color_palette(colorset))
# Save the plot
#plt.show(plot)
plt.savefig("Rate-Python.svg", format="svg")
This is the output:
What I want to have happen:
I want to be able to define that each row of dots should have a maximum of 3, if it's more, than break it into a new row. I demonstrate it here (done manually in PowerPoint) on two groups, but I want it for the entire plot
BEFORE:
AFTER:
Here is an attempt to relocate the dots a bit upward/downward. The value for delta comes from experimenting.
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
# Import and clea dataset
url = "https://raw.githubusercontent.com/amirnakar/scratchboard/master/Goodreads/goodreads_library_export.csv"
Books = pd.read_csv(url)
Books = Books[Books['Date Read'].notna()] # Remove NA
Books['Year'] = pd.DatetimeIndex(Books['Date Read']).year # Take only years
# Calculate mean rate by year
RatePerYear = Books[["My Rating", "Year"]].groupby("Year")["My Rating"].value_counts()
modified_ratings = []
delta = 0.2 # distance to move overlapping ratings
for (year, rating), count in RatePerYear.iteritems():
higher = max(0, ((count - 3) + 1) // 2)
lower = max(0, (count - 3) // 2)
modified_ratings.append([year, rating, count - higher - lower])
for k in range((higher + 2) // 3):
modified_ratings.append([year, rating + (k + 1) * delta, 3 if (k + 1) * 3 <= higher else higher % 3])
for k in range((lower + 2) // 3):
modified_ratings.append([year, rating - (k + 1) * delta, 3 if (k + 1) * 3 <= lower else lower % 3])
modified_ratings = np.array(modified_ratings)
modified_ratings_df = pd.DataFrame(
{'Year': np.repeat(modified_ratings[:, 0].astype(int), modified_ratings[:, 2].astype(int)),
'My Rating': np.repeat(modified_ratings[:, 1], modified_ratings[:, 2].astype(int))})
modified_ratings_df['Rating'] = modified_ratings_df['My Rating'].round().astype(int)
fig, ax = plt.subplots(figsize=(20, 10))
sns.violinplot(data=Books, x="Year", y='My Rating', ax=ax, color="white", inner=None)
palette = ["#FAFF04", "#FFD500", "#9BFF00", "#0099FF", "#000BFF"].reverse()
sns.swarmplot(data=modified_ratings_df, x="Year", y='My Rating', ax=ax, hue="Rating", size=10, palette=palette)
ax.set(xlim=(4.5, None), ylim=(0, 6))
ax.legend(loc="lower center", ncol=5)
plt.tight_layout()
plt.show()
Related
I try to produce a plot and want to automatically add text (in this case is percentage) to each circle in correspond to each y axis types. Any help would be very helpful.
# import libraries
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
%matplotlib inline
# Make some data
index=['Stream flow',
'Soil moisture',
'Water indices',
'Others',
'Temperature',
'Precipitation',
'Vegetative indices']
value=[2.13, 6.38, 10.64, 12.77, 17.73, 21.99, 28.37]
# create dataframe
percentages = pd.Series(value,index=index)
df = pd.DataFrame({'percentage' : percentages})
df = df.sort_values(by='percentage')
# we first need a numeric placeholder for the y axis
my_range=list(range(1,len(df.index)+1))
fig, ax = plt.subplots(figsize=(15,8))
# create for each expense type an horizontal line that starts at x = 0 with the length
plt.hlines(y=my_range, xmin=0, xmax=df['percentage']-0.5, color='black', alpha=0.8, linewidth=1)
# create for each expense type a dot at the level of the expense percentage value
line=plt.plot(df['percentage'], my_range, "o", markersize=30, color='#fd8c00', alpha=0.6, linewidth=0.3)
# set labels
ax.set_xlabel('Percentage', fontsize=15)
ax.set_ylabel('')
# set axis
ax.tick_params(axis='both', which='major', labelsize=14)
plt.yticks(my_range, df.index)
ax.set_xlim(0,30)
You can use matplotlib.axes.Axes.text:
x_space = 0.4
y_space = 0.05
fontsize = 7
for y_i, val in enumerate(value, 1):
ax.text(x = val - x_space, y = y_i - y_space, s = f'{val}%', fontsize = fontsize)
You have to adjust x_space, y_space and fontsize in order to fit properly the text within the circles.
Complete code
# import libraries
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
%matplotlib inline
# Make some data
index=['Stream flow',
'Soil moisture',
'Water indices',
'Others',
'Temperature',
'Precipitation',
'Vegetative indices']
value=[2.13, 6.38, 10.64, 12.77, 17.73, 21.99, 28.37]
# create dataframe
percentages = pd.Series(value,index=index)
df = pd.DataFrame({'percentage' : percentages})
df = df.sort_values(by='percentage')
# we first need a numeric placeholder for the y axis
my_range=list(range(1,len(df.index)+1))
fig, ax = plt.subplots(figsize=(15,8))
# create for each expense type an horizontal line that starts at x = 0 with the length
plt.hlines(y=my_range, xmin=0, xmax=df['percentage']-0.5, color='black', alpha=0.8, linewidth=1)
# create for each expense type a dot at the level of the expense percentage value
line=plt.plot(df['percentage'], my_range, "o", markersize=30, color='#fd8c00', alpha=0.6, linewidth=0.3)
# set labels
ax.set_xlabel('Percentage', fontsize=15)
ax.set_ylabel('')
# set axis
ax.tick_params(axis='both', which='major', labelsize=14)
plt.yticks(my_range, df.index)
ax.set_xlim(0,30)
x_space = 0.4
y_space = 0.05
for y_i, val in enumerate(value, 1):
ax.text(x = val - x_space, y = y_i - y_space, s = f'{val:>5.2f}%', fontsize = 7)
plt.show()
Same code as above, but with increased circle radius and font, in order to improve readability.
# import libraries
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
%matplotlib inline
# Make some data
index=['Stream flow',
'Soil moisture',
'Water indices',
'Others',
'Temperature',
'Precipitation',
'Vegetative indices']
value=[2.13, 6.38, 10.64, 12.77, 17.73, 21.99, 28.37]
# create dataframe
percentages = pd.Series(value,index=index)
df = pd.DataFrame({'percentage' : percentages})
df = df.sort_values(by='percentage')
# we first need a numeric placeholder for the y axis
my_range=list(range(1,len(df.index)+1))
fig, ax = plt.subplots(figsize=(15,8))
# create for each expense type an horizontal line that starts at x = 0 with the length
plt.hlines(y=my_range, xmin=0, xmax=df['percentage']-0.85, color='black', alpha=0.8, linewidth=1)
# create for each expense type a dot at the level of the expense percentage value
line=plt.plot(df['percentage'], my_range, "o", markersize=50, color='#fd8c00', alpha=0.6, linewidth=0.3)
# set labels
ax.set_xlabel('Percentage', fontsize=15)
ax.set_ylabel('')
# set axis
ax.tick_params(axis='both', which='major', labelsize=14)
plt.yticks(my_range, df.index)
ax.set_xlim(0,30)
ax.set_ylim(0, len(value) + 1)
x_space = 0.75
y_space = 0.06
fontsize = 12
for y_i, val in enumerate(value, 1):
ax.text(x = val - x_space, y = y_i - y_space, s = f'{val:>5.2f}%', fontsize = fontsize)
plt.show()
Even better, you can use matplotlib.axes.Axes.annotate to get rid of x_space and y_space:
fontsize = 12
for y_i, x_i in enumerate(value, 1):
ax.annotate(f'{x_i:>5.2f}%', xy = (x_i, y_i), xytext = (0, 0), textcoords = 'offset points', ha = 'center', va = 'center', fontsize = fontsize)
You still have to adjust the fontsize to properly fit the radius of the circles.
My end goal is to create a graph that quickly communicates that two data points are between their respective bounds. I could instead of having this information on one graph, create two separate graphs; the chart the data and illustrate the bounds with horizontal lines. If I could have it so that this basic function is done with one graph, it would be much more elegant.
Is there some method I can use to sync the two y-axes so that a certain value A1 on y-axis 1 and A2 on y-axis 2 appear on the same place vertically within the graph, while at the same time, ensuring that another certain value B1 on y-axis 1 and B2 on y-axis 2appears on a separate distinct place vertically within the graph?
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import datetime
### Generate linear data
Temp = 20
pH = 6
DataCopy = pd.DataFrame({'Temp': [], 'pH': [], 'Time': []})
for i in range(10):
DataTime = datetime.datetime.now()
DataCopy = DataCopy.append({'Temp': Temp, 'pH': pH, 'Time': DataTime}, ignore_index=True)
Temp += (-0.5)
pH += (0.2)
### Plot data unto graph w/ double y-axis
sns.lineplot(data=DataCopy, x='Time', y='pH', color = 'red', label = 'Temp')
ax2 = plt.twinx()
sns.lineplot(data=DataCopy, x='Time', y='Temp', color = 'blue', label = 'pH', ax=ax2)
plt.legend()
plt.show()
How this implementation would look if done on separate graphs:
Desired effect:
You seem to want to align two positions on the left axis with two positions on the right axis.
The following approach measures the distance factor of the lower limit vs the two lines, for both axes. Then it applies the largest factor to the axis with the lowest factor. The analog happens for the upper limit of the axes.
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
np.random.seed(2021)
ph_low, ph_high = 7, 9
temp_low, temp_high = 12, 18
data_copy = pd.DataFrame({'Temp': np.random.normal(0.04, 0.4, 100).cumsum() + 10,
'pH': np.random.normal(0.02, 0.2, 100).cumsum() + 6,
'Time': pd.date_range('20211211 08:00:00', freq='1min', periods=100)})
plt.figure(figsize=(12, 5))
ax1 = sns.lineplot(data=data_copy, x='Time', y='pH', color='red', label='Temp')
ax1.axhline(ph_low, color='red', ls=(0, (5, 5, 0)))
ax1.axhline(ph_high, color='red', ls=(0, (5, 5, 0)))
ax2 = ax1.twinx()
sns.lineplot(data=data_copy, x='Time', y='Temp', color='blue', label='pH', ax=ax2)
ax2.axhline(temp_low, color='blue', ls=(0, (0, 5, 5)))
ax2.axhline(temp_high, color='blue', ls=(0, (0, 5, 5)))
handles1, labels1 = ax1.get_legend_handles_labels()
handles2, labels2 = ax2.get_legend_handles_labels()
ax1.legend_.remove()
ax2.legend(handles=handles1 + handles2, labels=labels1 + labels2)
ymin1, ymax1 = ax1.get_ylim()
ymin2, ymax2 = ax2.get_ylim()
fymin1 = (ph_low - ymin1) / (ph_high - ph_low)
fymin2 = (temp_low - ymin2) / (temp_high - temp_low)
if fymin1 < fymin2: # move ymin1 using fymin2
ymin1 = ph_low - fymin2 * (ph_high - ph_low)
else: # move ymin2 using fymin1
ymin2 = temp_low - fymin1 * (temp_high - temp_low)
fymax1 = (ymax1 - ph_high) / (ph_high - ph_low)
fymax2 = (ymax2 - temp_high) / (temp_high - temp_low)
if fymax1 < fymax2: # move ymax1 using fymax2
ymax1 = ph_high + fymax2 * (ph_high - ph_low)
else: # move ymax2 using fymax1
ymax2 = temp_high + fymax1 * (temp_high - temp_low)
ax1.set_ylim(ymin1, ymax1)
ax2.set_ylim(ymin2, ymax2)
plt.show()
I plot a chart with matplotlib but the x-ticks are too crowded. May I know any solution to fix it?
from pandas_datareader import data
import datetime
tickers = 'AAPL'
dateToday = datetime.datetime.today().strftime("%Y-%m-%d")#年月日20190526
# Only get the adjusted close.
tickers_data = data.DataReader(tickers,
start='',
end=dateToday,
data_source='yahoo')[["Adj Close", "Volume"]][-250:]
returns = tickers_data.pct_change()
plt.figure(figsize=(12,6))
ax = sns.barplot(x=returns.index.strftime('%d/%-m'), y=returns['Adj Close'], color='#73a9d1')
plt.xticks(rotation = 90)
plt.title('Returns' + '\n' + tickers)
Output:
If you want to see, for example, every fifth x-tick (x-tick step is 5), you can improve your code in this way:
step = 5
x_values = returns.index.strftime('%d/%-m')
x_ticks_values = x_values[::step]
plt.figure(figsize = (12, 6))
ax = sns.barplot(x = x_values,
y = returns['Adj Close'],
color = '#73a9d1')
plt.xticks(ticks = np.arange(0, (len(x_values) + step), step),
labels = x_ticks_values,
rotation = 90)
I have two numeric arrays of equal length, with one array always having the element value >= to the corresponding (same index) element in the second array.
I am trying to visualize in a single graph:
i) difference between the corresponding elements,
ii) values of the corresponding elements in the two arrays.
I have tried plotting the CDF as below:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
arr1 = np.random.uniform(1,20,[25,1])
arr2 = arr1 + np.random.uniform(1,10,[25,1])
df1 = pd.DataFrame(arr1)
df2 = pd.DataFrame(arr2)
fix, ax = plt.subplots()
sns.kdeplot(df1[0], cumulative=True, color='orange', label='arr1')
sns.kdeplot(df2[0], cumulative=True, color='b', label='arr2')
sns.kdeplot(df2[0]-df1[0], cumulative=True, color='r', label='difference')
plt.show()
which gives the following output:
However, it does not capture the difference, and values of the corresponding elements together. For example, suppose the difference between two elements is 3. The two numbers can be 2 and 5, but they can also be 15 and 18, and this can not be determined from the CDF.
Which kind of plotting can visualize both the difference between the elements and the values of the elements?
I do not wish to line plot as below because not much statistical insights can be derived from the visualization.
ax.plot(df1[0])
ax.plot(df2[0])
ax.plot(df2[0]-df1[0])
There are lots of ways to show difference between two values. It really depends on your goal for the chart, how quantitative or qualitative you want to be, or if you want to show the raw data somehow. Here are a few ideas that come to mind that do not involve simple line plots or density functions. I strongly recommend the book Better Data Visualization by Johnathan Schwabish. He discusses interesting considerations regarding data presentation.
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import ticker
arr1 = np.random.uniform(1,20, size=25)
arr2 = arr1 + np.random.uniform(1,10, size=25)
df = pd.DataFrame({
'col1' : arr1,
'col2' : arr2
})
df['diff'] = df.col2 - df.col1
df['sum'] = df.col1 + df.col2
fig, axes = plt.subplots(ncols=2, nrows=3, figsize=(15,15))
axes = axes.flatten()
# Pyramid chart
df_sorted = df.sort_values(by='sum', ascending=True)
axes[0].barh(
y = np.arange(1,26),
width = -df_sorted.col1
)
axes[0].barh(
y = np.arange(1,26),
width = df_sorted.col2
)
# Style axes[0]
style_func(axes[0], 'Pyramid Chart')
# Dot Plot
axes[1].scatter(df.col1, np.arange(1, 26), label='col1')
axes[1].scatter(df.col2, np.arange(1, 26), label='col2')
axes[1].hlines(
y = np.arange(1, 26),
xmin = df.col1, xmax = df.col2,
zorder=0, linewidth=1.5, color='k'
)
# Style axes[1]
legend = axes[1].legend(ncol=2, loc='center', bbox_to_anchor=(0.14,1.025), edgecolor='w')
style_func(axes[1], 'Dot Plot')
set_xlim = axes[1].set_xlim(0,25)
# Dot Plot 2
df_sorted = df.sort_values(by=['col1', 'diff'], ascending=False)
axes[2].scatter(df_sorted.col1, np.arange(1, 26), label='col1')
axes[2].scatter(df_sorted.col2, np.arange(1, 26), label='col2')
axes[2].hlines(
y = np.arange(1, 26),
xmin = df_sorted.col1, xmax = df_sorted.col2,
zorder=0, linewidth=1.5, color='k'
)
# Style axes[2]
legend = axes[2].legend(ncol=2, loc='center', bbox_to_anchor=(0.14,1.025), edgecolor='w')
style_func(axes[2], 'Dot Plot')
set_xlim = axes[2].set_xlim(0,25)
# Dot Plot 3
df_sorted = df.sort_values(by='sum', ascending=True)
axes[3].scatter(-df_sorted.col1, np.arange(1, 26), label='col1')
axes[3].scatter(df_sorted.col2, np.arange(1, 26), label='col2')
axes[3].vlines(x=0, ymin=-1, ymax=27, linewidth=2.5, color='k')
axes[3].hlines(
y = np.arange(1, 26),
xmin = -df_sorted.col1, xmax = df_sorted.col2,
zorder=0, linewidth=2
)
# Style axes[3]
legend = axes[3].legend(ncol=2, loc='center', bbox_to_anchor=(0.14,1.025), edgecolor='w')
style_func(axes[3], 'Dot Plot')
# Strip plot
axes[4].scatter(df.col1, [4] * 25)
axes[4].scatter(df.col2, [6] * 25)
axes[4].set_ylim(0, 10)
axes[4].vlines(
x = [df.col1.mean(), df.col2.mean()],
ymin = [3.5, 5.5], ymax=[4.5,6.5],
color='black', linewidth =2
)
# Style axes[4]
axes[4].yaxis.set_major_locator(ticker.FixedLocator([4,6]))
axes[4].yaxis.set_major_formatter(ticker.FixedFormatter(['col1','col2']))
hide_spines = [axes[4].spines[x].set_visible(False) for x in ['left','top','right']]
set_title = axes[4].set_title('Strip Plot', fontweight='bold')
tick_params = axes[4].tick_params(axis='y', left=False)
grid = axes[4].grid(axis='y', dashes=(8,3), alpha=0.3, color='gray')
# Slope chart
for i in range(25):
axes[5].plot([0,1], [df.col1[i], df.col2[i]], color='k')
align = ['left', 'right']
for i in range(1,3):
axes[5].text(x = i - 1, y = 0, s = 'col' + str(i),
fontsize=14, fontweight='bold', ha=align[i-1])
set_title = axes[5].set_title('Slope chart', fontweight='bold')
axes[5].axis('off')
def style_func(ax, title):
hide_spines = [ax.spines[x].set_visible(False) for x in ['left','top','right']]
set_title = ax.set_title(title, fontweight='bold')
set_xlim = ax.set_xlim(-25,25)
x_locator = ax.xaxis.set_major_locator(ticker.MultipleLocator(5))
y_locator = ax.yaxis.set_major_locator(ticker.FixedLocator(np.arange(1,26, 2)))
spine_width = ax.spines['bottom'].set_linewidth(1.5)
x_tick_params = ax.tick_params(axis='x', length=8, width=1.5)
x_tick_params = ax.tick_params(axis='y', left=False)
What about a parallel coordinates plot with plotly? This will allow to see the distinct values of each original array but then also if they converge on the same diffrence?
https://plot.ly/python/parallel-coordinates-plot/
I am using the code (posted here: https://stackoverflow.com/a/22845857/6649485) below to generate a clustered stacked bar plot. Unfortunately the error bars are not shifted in the same way as the data bars. I am not sure how to adress them and set their x-value accordingly.
def plot_clustered_stacked(dfall, labels=None, title="SEC stress study", H="/", **kwargs):
"""Given a list of dataframes, with identical columns and index, create a clustered stacked bar plot. labels is a list of the names of the dataframe, used for the legend title is a string for the title of the plot H is the hatch used for identification of the different dataframe"""
n_df = len(dfall)
n_col = len(dfall[0].columns)
n_ind = len(dfall[0].index)
axe = plt.subplot(111)
for df in dfall : # for each data frame
axe = df.plot(kind="bar",
linewidth=0,
stacked=True,
ax=axe,
legend=False,
grid=False,
yerr=0.1,
**kwargs) # make bar plots
h,l = axe.get_legend_handles_labels() # get the handles we want to modify
for i in range(0, n_df * n_col, n_col): # len(h) = n_col * n_df
for j, pa in enumerate(h[i:i+n_col]):
for rect in pa.patches: # for each index
rect.set_x(rect.get_x() + 1 / float(n_df + 1) * i / float(n_col))
rect.set_hatch(H * int(i / n_col)) #edited part
rect.set_width(1 / float(n_df + 1))
axe.set_xticks((np.arange(0, 2 * n_ind, 2) + 1 / float(n_df + 1)) / 2.)
axe.set_xticklabels(df.index, rotation = 0)
axe.set_title(title)
# Add invisible data to add another legend
n=[]
for i in range(n_df):
n.append(axe.bar(0, 0, color="gray", hatch=H * i))
l1 = axe.legend(h[:n_col], l[:n_col], loc=[1.01, 0.5])
if labels is not None:
l2 = plt.legend(n, labels, loc=[1.01, 0.1])
axe.add_artist(l1)
return axe
This is what I came up with by now.
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
def plot_clustered_stacked(dfall):
for j, df in enumerate(dfall):
set_count=1
width = 0.2
N=len(df.index)
b_width = 0.2
index=np.arange(N/set_count)
labels=df.index
p1 = plt.bar(index +j*b_width, df['Average Monomer Area'], width=width, yerr='0.5 Stdev Monomer Area', data=df)
p2 = plt.bar(index +j*b_width, df['Average HMW Area'], bottom=df['Average Monomer Area'], width=width, yerr='0.5 Stdev HMW Area', data=df)
plt.xticks(index+b_width, labels)
plt.legend((p1[0], p2[0]), ('Average Monomer Area', 'Average
HMW Area'))
data=pd.read_csv("SEC.csv")
df1 = data[data['Time'].str.contains("0 weeks")].drop(['High/low', 'Time'], axis=1)
df1.set_index('Excipient Name', inplace=True)
df2 = data[data['Time'].str.contains("1 week")].drop(['High/low', 'Time'], axis=1)
df2.set_index('Excipient Name', inplace=True)
df3 = data[data['Time'].str.contains("3 weeks")].drop(['High/low', 'Time'], axis=1)
df3.set_index('Excipient Name', inplace=True)
plot_clustered_stacked([df1, df2, df3])
plt.show()