Creating legend in matplotlib after plotting two Pandas Series - python

I plotted two Pandas Series from the same DataFrame with the same x axis and everything worked out fine. However, when I tried to manually create a Legend, it appears but only with the title and not with the actually content. I've tried other solutions without any luck. Here's my code:
fig = plt.figure()
ax1 = fig.add_subplot(111)
ax2 = ax1.twinx()
width = .3
df.tally.plot(kind='bar', color='red', ax=ax1, width=width, position=1, grid=False)
df.costs.plot(kind='bar', color='blue', ax=ax2, width=width, position=0, grid=True)
ax1.set_ylabel('Tally')
ax2.set_ylabel('Total Cost')
handles1, labels1 = ax1.get_legend_handles_labels()
handles2, labels2 = ax2.get_legend_handles_labels()
plt.legend([handles1, handles2], [labels1, labels2], loc='upper left', title='Legend')
plt.show()
plt.clf()

Maybe you have a good reason to do it your way, but if not, this is much easier:
In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
# Optional, just better looking
import seaborn as sns
# Generate random data
df = pd.DataFrame(np.random.randn(10,3), columns=['tally', 'costs', 'other'])
df[['tally', 'costs']].plot(kind='bar', width=.3)
plt.show();
Out[1]:
Edit
After learning that this is because you have a much different scale for the other one, here's the pandas approach:
# Generate same data as Jianxun Li
np.random.seed(0)
df = pd.DataFrame(np.random.randint(50,100,(20,3)), columns=['tally', 'costs', 'other'])
df.costs = df.costs * 5
width = .3
df.tally.plot(kind='bar', color='#55A868', position=1, width=width, legend=True, figsize=(12,6))
df.costs.plot(kind='bar', color='#4C72B0', position=0, width=width, legend=True, secondary_y=True)
plt.show();

Something like this?
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
# your data
# ===============================
np.random.seed(0)
df = pd.DataFrame(np.random.randint(50,100,(20,3)), columns=['col1', 'col2', 'col3'])
df.col2 = df.col2 * 5
# bar plot with twinx
# ===============================
fig, ax = plt.subplots()
width=0.3
ax.bar(df.index, df.col1, width=width, color='red', label='col1_data')
ax.legend(loc='best')
ax2 = ax.twinx()
ax2.bar(df.index+width, df.col2, width=width, color='blue', label='col2_data')
ax2.legend(loc='best')

Related

matplotlib multiple Y-axis pandas plot

Could someone give me a tip on how to do multiple Y axis plots?
This is some made up data below, how could I put Temperature its own Y axis, Pressure on its own Y axis, and then have both Value1 and Value2 on the same Y axis. I am trying to go for the same look and feel of this SO post answer. Thanks for any tips, I don't understand ax3 = ax.twinx() process, like as far as do I need to define an ax.twinx() for each separate Y axis plot I need?
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
rows,cols = 8760,4
data = np.random.rand(rows,cols)
tidx = pd.date_range('2019-01-01', periods=rows, freq='H')
df = pd.DataFrame(data, columns=['Temperature','Value1','Pressure','Value2'], index=tidx)
# using subplots() function
fig, ax = plt.subplots(figsize=(25,8))
plt.title('Multy Y Plot')
ax2 = ax.twinx()
ax3 = ax.twinx()
ax4 = ax.twinx()
plot1, = ax.plot(df.index, df.Temperature)
plot2, = ax2.plot(df.index, df.Value1, color = 'r')
plot3, = ax3.plot(df.index, df.Pressure, color = 'g')
plot4, = ax4.plot(df.index, df.Value2, color = 'b')
ax.set_xlabel('Date')
ax.set_ylabel('Temperature')
ax2.set_ylabel('Value1')
ax3.set_ylabel('Pressure')
ax4.set_ylabel('Value2')
plt.legend([plot1,plot2,plot3,plot4],list(df.columns))
# defining display layout
plt.tight_layout()
# show plot
plt.show()
This will output everything jumbled up on the same side without separate Y axis for Pressure, Value1, and Value2.
You are adding 4 different plots in one, which is not helpful. I would recommend breaking it into 2 plots w/ shared x-axis "Date":
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
rows,cols = 8760,4
data = np.random.rand(rows,cols)
tidx = pd.date_range('2019-01-01', periods=rows, freq='H')
df = pd.DataFrame(data, columns=['Temperature','Value1','Pressure','Value2'], index=tidx)
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(25,8))
plt.title('Multy Y Plot')
ax1b = ax1.twinx()
plot1a, = ax1.plot(df.index, df.Temperature)
plot1b, = ax1b.plot(df.index, df.Pressure, color='r')
ax1.set_ylabel('Temperature')
ax1b.set_ylabel('Pressure')
ax2b = ax2.twinx()
plot2a, = ax2.plot(df.index, df.Value1, color='k')
plot2b, = ax2b.plot(df.index, df.Value2, color='g')
ax2.set_xlabel('Date')
ax2.set_ylabel('Value1')
ax2b.set_ylabel('Value2')
plt.legend([plot1a, plot1b, plot2a, plot2b], df.columns)
# defining display layout
plt.tight_layout()
# show plot
plt.show()
Here I have added in the first plot (on the top) Temperature and Pressure and on the second plot (on the bottom) Value 1 and Value 2. Normally, we add in the same plot things that make sense to compare on the same x-axis. Pressure and Temperature is a valid combination that is why I combined those two together. But you can do as you wish.
This answer below uses mpatches is how to make the subplot of Value1 and Value2 on the same axis. The solution for this post has subplot for Value1 and Value2 on different axis. Thanks for the help #tzinie!
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
rows,cols = 8760,4
data = np.random.rand(rows,cols)
tidx = pd.date_range('2019-01-01', periods=rows, freq='H')
df = pd.DataFrame(data, columns=['Temperature','Value1','Pressure','Value2'], index=tidx)
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(25,8))
plt.title('Multy Y Plot')
ax1b = ax1.twinx()
plot1a, = ax1.plot(df.index, df.Temperature, color='r') # red
plot1b, = ax1b.plot(df.index, df.Pressure, color='b') # blue
ax1.set_ylabel('Temperature')
ax1b.set_ylabel('Pressure')
ax2.plot(df.index, df.Value1, color='k') # black
ax2.plot(df.index, df.Value2, color='g') # green
ax2.set_xlabel('Date')
ax2.set_ylabel('Value1 & Value2')
red_patch = mpatches.Patch(color='red', label='Temperature')
blue_patch = mpatches.Patch(color='blue', label='Pressure')
green_patch = mpatches.Patch(color='green', label='Value2')
black_patch = mpatches.Patch(color='black', label='Value1')
plt.legend(handles=[red_patch,blue_patch,green_patch,black_patch])
# defining display layout
#plt.tight_layout()
# show plot
plt.show()

barh with plot : cannot get different scale for data on secondary x axis

I cannot get two different scales for the plot:
I don't know how to activate the scale of the secondary x axis.
"STK" and "Material" are supposed to be displayed at different scales.
How to display "Material" on it's own scale (0,max) like it was done automatically for "STK"?
I need it to be displayed like on the image below :
Here's the code:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
df = [['MPP1',400,30],['MPP2',3500,700], ['MPP3',1900,3], ['MPP4',15000,56], ['MPP5',8500,306]]
df = pd.DataFrame(df)
df.columns =['MPP', 'STK', 'Material']
plt.rcdefaults()
fig, ax = plt.subplots(constrained_layout=True)
xdata = df.STK
x2data = df.Material
ydata = df.MPP
y_pos = np.arange(len(ydata))
ax.barh(y_pos, df.STK , label='STK per MPP')
ax.invert_yaxis()
ax.plot(x2data, ydata, label='Material per MPP', color='red')
ax.set_xlabel('STK')
ax.legend()
ax2 = ax.secondary_xaxis('top')
ax2.set_xlabel('Material')
ax2.set_xticks(df.Material)
ax2.set_xticklabels(df.Material)
ax2.set_xlabel(r"Material")
plt.show()
You should create the secondary axis with:
ax2 = ax.twiny()
and plot your data on it:
ax2.plot(x2data, ydata, label='Material per MPP', color='red')
Pay attention: ax2.plot, not ax.plot.
Complete Code
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
df = [['MPP1',400,30],['MPP2',3500,700], ['MPP3',1900,3], ['MPP4',15000,56], ['MPP5',8500,306]]
df = pd.DataFrame(df)
df.columns =['MPP', 'STK', 'Material']
plt.rcdefaults()
fig, ax = plt.subplots(constrained_layout=True)
xdata = df.STK
x2data = df.Material
ydata = df.MPP
y_pos = np.arange(len(ydata))
ax.barh(y_pos, df.STK , label='STK per MPP')
ax.invert_yaxis()
ax.set_xlabel('STK')
leg = plt.legend()
ax2 = ax.twiny()
ax2.plot(x2data, ydata, label='Material per MPP', color='red')
ax2.set_xlabel('Material')
leg2 = plt.legend()
plt.legend(leg.get_patches()+leg2.get_lines(),
[text.get_text() for text in leg.get_texts()+leg2.get_texts()])
leg.remove()
plt.show()

Merge Count Plot and Mean in same plot SNS

I am trying to create a count plot and also add another plot on it which would actually be the mean of the other columns.
The sample data is in the below link:
Sample Data
I have used the below code to create the sns count plot:
df = pd.read_csv("latestfile.csv")
df.sort_values(by=["Business"],inplace=True)
sns.countplot(data=df,x=df["Business"],hue="location")
and I generate the below:
Now I use the groupby and use the below code to get the desired data:
dfg = df.groupby(["Business","location"])['Ageing'].mean().reset_index()
dfg.set_index("Business",inplace=True)
but how do I plot this on the same count plot on the different y axis.
Unable to think of a way to do it.
Below is what I am finally looking for:
Of course, you can squeeze another bar plot into the countplot graph:
import pandas as pd
from matplotlib import pyplot as plt
import seaborn as sns
df = pd.read_csv("test.csv")
df.sort_values(by=["Business"],inplace=True)
ax1 = sns.countplot(data=df, x="Business", hue="location", palette="muted", edgecolor="black")
for patch in ax1.patches:
patch.set_x(patch.get_x() + 0.3 * patch.get_width())
ax1.legend(title="Count")
ax2 = ax1.twinx()
sns.barplot(data=df, x="Business", y="Ageing", hue="location", palette="bright", ci=None, ax=ax2, edgecolor="white")
ax2.legend(title="Ageing")
ax1.autoscale_view()
plt.show()
However, I would definitely prefer two subplots:
import pandas as pd
from matplotlib import pyplot as plt
import seaborn as sns
df = pd.read_csv("test.csv")
df.sort_values(by=["Business"],inplace=True)
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(10, 5))
sns.countplot(data=df, x="Business", hue="location", ax=ax1)
ax1.legend(title="Count")
sns.barplot(data=df, x="Business", y="Ageing", hue="location", ci=None, ax=ax2)
ax2.legend(title="Ageing")
plt.show()
Since you prefer now the distribution, you can combine the countplot with a stripplot:
import pandas as pd
from matplotlib import pyplot as plt
import seaborn as sns
df = pd.read_csv("test.csv")
df.sort_values(by=["Business"],inplace=True)
ax1 = sns.countplot(data=df, x="Business", hue="location")
ax2 = ax1.twinx()
sns.stripplot(data=df, x="Business", y="Ageing", hue="location", jitter=True, dodge=True, ax=ax2, linewidth=1)
ax1.legend(title="", loc="upper center")
ax2.legend_.remove()
plt.show()

Share X axis between line and bar plot in Python's Matplotlib

I have the following script for generating a figure with two subplots: one line plot, and one bar plot.
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
plt.close('all')
np.random.seed(42)
n = 1000
idx = pd.date_range(end='2020-02-27', periods=n)
df = pd.Series(np.random.randint(-5, 5, n),
index=idx)
curve = df.cumsum()
bars = df.resample('M').sum()
fig = plt.figure()
ax1 = fig.add_subplot(211)
ax2 = fig.add_subplot(212)
curve.plot(ax=ax1)
bars.plot(kind='bar', ax=ax2)
fig.set_tight_layout(True)
I would like to share the x axis between the two subplots, however the command ax2 = fig.add_subplot(212, sharex=ax1) will result in an empty graph for the line plot like the following figure.
Here is my version based on Matplotlib (without pandas api for plotting), may be it would be helpful.
I explicitly set the width of bars.
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
%matplotlib inline
plt.close('all')
np.random.seed(42)
n = 1000
idx = pd.date_range(end='2020-02-27', periods=n)
df = pd.Series(np.random.randint(-5, 5, n), index=idx)
curve = df.cumsum()
bars = df.resample('M').sum()
#fig = plt.figure()
#ax1 = fig.add_subplot(211)
#ax2 = fig.add_subplot(212)
#curve.plot(ax=ax1)
#bars.plot(kind='bar', ax=ax2)
fig, (ax1, ax2) = plt.subplots(2, 1, sharex=True, gridspec_kw={'hspace': 0})
ax1.plot(curve.index, curve.values)
ax2.bar(bars.index, bars.values, width = (bars.index[0] - bars.index[1])/2)
fig.set_tight_layout(True)
_ = plt.xticks(bars.index, bars.index, rotation=90)

How to change border color of violin plot in pandas graph?

I want to change the color of lineborder of violinplots.
I can set lines.linewidth to 0 but I want to show borders not to hide them. How to change the color of the border?
sns.set_context("paper", rc={"lines.linewidth": 0.8})
My code is as follows:
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib import style
import pandas as pd
import numpy as np
datasets = pd.read_csv("merged.csv", index_col=0);
df = datasets
df.protocol = df.protocol.astype(str)
f, ax = plt.subplots(figsize=(18, 6))
sns.violinplot(x="time",
y="values",
hue="protocol",
data=df,
bw=.5,
scale="count"
)
sns.despine(left=True)
f.suptitle('Title', fontsize=22, fontweight='bold')
ax.set_xlabel("Time",size = 16,alpha=0.7)
ax.set_ylabel("Values",size = 16,alpha=0.7)
ax.set_xticklabels(df.qber, rotation=90)
ax.grid(True)
plt.legend(loc='upper right')
plt.grid(linestyle='--', alpha=0.7)
fig = ax.get_figure()
fig.savefig('time_v.pdf', bbox_inches='tight')
Thank you!
this should be very close to what you're looking for:
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib import style
import pandas as pd
import numpy as np
def patch_violinplot(palette, n):
from matplotlib.collections import PolyCollection
ax = plt.gca()
violins = [art for art in ax.get_children() if isinstance(art, PolyCollection)]
colors = sns.color_palette(palette, n_colors=n) * (len(violins)//n)
for i in range(len(violins)):
violins[i].set_edgecolor(colors[i])
datasets = pd.read_csv("merged.csv", index_col=0);
df = datasets
df.protocol = df.protocol.astype(str)
num_cols = df['protocol'].nunique()
f, ax = plt.subplots(figsize=(18, 6))
sns.violinplot(x="time",
y="values",
hue="protocol",
data=df,
bw=.5,
scale="count",
palette="deep"
)
patch_violinplot("deep", num_cols)
sns.despine(left=True)
f.suptitle('Title', fontsize=22, fontweight='bold')
ax.set_xlabel("Time",size = 16,alpha=0.7)
ax.set_ylabel("Values",size = 16,alpha=0.7)
ax.set_xticklabels(df.qber, rotation=90)
ax.grid(True)
plt.legend(loc='upper right')
plt.grid(linestyle='--', alpha=0.7)
fig = ax.get_figure()
fig.savefig('time_v.pdf', bbox_inches='tight')
The patch_violin function came from here.

Categories