Plotting difficulty combining 3 variables and repositioning the legends in matplotlib

Plotting difficulty combining 3 variables and repositioning the legends in matplotlib - python

I have data where I have names, proportions and total. I want to show all 3 variables in one plot. Ideally I want to have everything like plot 1 but inside I want to show total as in plot 2
In first plot I don't get line right also this is not my plot of choice.
import pandas as pd
from matplotlib import pyplot as plt
import seaborn as sns
df = pd.DataFrame({"name": list("ABCDEFGHIJ"), "proportion": [0.747223, 0.785883, 0.735542, 0.817368, 0.565193, 0.723029, 0.723004, 0.722595, 0.783929, 0.55152],
"total": [694327, 309681, 239384, 201646, 192267, 189399, 181974, 163483, 157902, 153610]})
fig, ax1 = plt.subplots()
ax2 = ax1.twinx()
sns.barplot(data=df, x="name", y="total", color="lightblue", ax=ax1)
sns.lineplot(data=df, x="name", y= "proportion", color="black", lw=3, ls="--", ax=ax2)
# Plot the figure.
df["male"] = df.proportion * df.total
ax = sns.barplot(data = df, x= "name", y = 'total', color = "lightblue")
sns.barplot(data = df, x="name", y = "male", color = "blue", ax = ax)
ax.set_ylabel("male/no_of_streams")
Is there a way I can achieve my goal of effective plot where
I can show total split
I also want to add proportions values to plot as well
Any help would be appreciated
Thanks in advance

If my understanding is right, for the first plot, I guess you wanna to know why the line is dashed. Just remove argument ls="--", you will get solid line.
The second, following code can work, if you want percentage of "man-number" / "total". If the percentage is computed using other numbers, you can adjust the equation in the for statement:
import pandas as pd
from matplotlib import pyplot as plt
import seaborn as sns
if __name__ == '__main__':
df = pd.DataFrame({"name": list("ABCDEFGHIJ"), "proportion": [0.747223, 0.785883, 0.735542, 0.817368, 0.565193, 0.723029, 0.723004, 0.722595, 0.783929, 0.55152], "total": [694327, 309681, 239384, 201646, 192267, 189399, 181974, 163483, 157902, 153610]})
# fig, ax1 = plt.subplots()
# ax2 = ax1.twinx()
# sns.barplot(data=df, x="name", y="total", color="lightblue", ax=ax1)
# # remove ls='--'
# sns.lineplot(data=df, x="name", y="proportion", color="black", lw=3, ax=ax2)
# Plot the figure.
df["male"] = df.proportion * df.total
ax = sns.barplot(data = df, x= "name", y = 'total', color = "lightblue")
sns.barplot(data = df, x="name", y = "male", color = "blue", ax = ax)
ax.set_ylabel("proportion(male/no_of_streams)")
# this is code block to add percentage
for i, v in enumerate(df['proportion']):
p = ax.patches[i]
height = p.get_height()
ax.text(p.get_x()+p.get_width()/2.,
height + 3,
'{:1.0f}%'.format(v * 100),
ha="center")
plt.show()
BTW, I learn at this page, FYI.

Related

Combine Binned barplot with lineplot

I'd like to represent two datasets on the same plot, one as a line as one as a binned barplot. I can do each individually:
tobar = pd.melt(pd.DataFrame(np.random.randn(1000).cumsum()))
tobar["bins"] = pd.qcut(tobar.index, 20)
bp = sns.barplot(data=tobar, x="bins", y="value")
toline = pd.melt(pd.DataFrame(np.random.randn(1000).cumsum()))
lp = sns.lineplot(data=toline, x=toline.index, y="value")
But when I try to combine them, of course the x axis gets messed up:
fig, ax = plt.subplots()
ax2 = ax.twinx()
bp = sns.barplot(data=tobar, x="bins", y="value", ax=ax)
lp = sns.lineplot(data=toline, x=toline.index, y="value", ax=ax2)
bp.set(xlabel=None)
I also can't seem to get rid of the bin labels.
How can I get these two informations on the one plot?

This answer explains why it's better to plot the bars with matplotlib.axes.Axes.bar instead of sns.barplot or pandas.DataFrame.bar.
In short, the xtick locations correspond to the actual numeric value of the label, whereas the xticks for seaborn and pandas are 0 indexed, and don't correspond to the numeric value.
This answer shows how to add bar labels.
ax2 = ax.twinx() can be used for the line plot if needed
Works the same if the line plot is different data.
Tested in python 3.11, pandas 1.5.2, matplotlib 3.6.2, seaborn 0.12.1
Imports and DataFrame
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
# test data
np.random.seed(2022)
df = pd.melt(pd.DataFrame(np.random.randn(1000).cumsum()))
# create the bins
df["bins"] = pd.qcut(df.index, 20)
# add a column for the mid point of the interval
df['mid'] = df.bins.apply(lambda row: row.mid.round().astype(int))
# pivot the dataframe to calculate the mean of each interval
pt = df.pivot_table(index='mid', values='value', aggfunc='mean').reset_index()
Plot 1
# create the figure
fig, ax = plt.subplots(figsize=(30, 7))
# add a horizontal line at y=0
ax.axhline(0, color='black')
# add the bar plot
ax.bar(data=pt, x='mid', height='value', width=4, alpha=0.5)
# set the labels on the xticks - if desired
ax.set_xticks(ticks=pt.mid, labels=pt.mid)
# add the intervals as labels on the bars - if desired
ax.bar_label(ax.containers[0], labels=df.bins.unique(), weight='bold')
# add the line plot
_ = sns.lineplot(data=df, x=df.index, y="value", ax=ax, color='tab:orange')
Plot 2
fig, ax = plt.subplots(figsize=(30, 7))
ax.axhline(0, color='black')
ax.bar(data=pt, x='mid', height='value', width=4, alpha=0.5)
ax.set_xticks(ticks=pt.mid, labels=df.bins.unique(), rotation=45)
ax.bar_label(ax.containers[0], weight='bold')
_ = sns.lineplot(data=df, x=df.index, y="value", ax=ax, color='tab:orange')
Plot 3
The bar width is the width of the interval
fig, ax = plt.subplots(figsize=(30, 7))
ax.axhline(0, color='black')
ax.bar(data=pt, x='mid', height='value', width=50, alpha=0.5, ec='k')
ax.set_xticks(ticks=pt.mid, labels=df.bins.unique(), rotation=45)
ax.bar_label(ax.containers[0], weight='bold')
_ = sns.lineplot(data=df, x=df.index, y="value", ax=ax, color='tab:orange')

Put two y-axes on one graph

I am trying to put a y-axis on the right and left side of a graph. I am using pandas where I have a data frame take a certain range in an excel sheet and graph it out. The code is able to plot out the three columns that I want vs y however I'm confused on how to get the PM3 scatter plot (ax2) on the right side while keeping the PM1 and AFS scatter plot (ax1 and ax3) on the left. I tried using twinx() and other commands but it doesn't work how I want it. Any suggestions?
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
testproject = r"C:\Users\223070186\Documents\PleaseWork.xlsx"
var = pd.read_excel(testproject, sheet_name ="Test1")
df = pd.DataFrame(var, columns = ["Time", "PM1", "PM3", "AFS"])
df2 = df.iloc[1108:1142, 0:4]
ax1 = df2.plot(kind = "scatter", x = "Time", y = "PM1", color = "r")
ax2 = df2.plot(kind = "scatter", x="Time", y = "PM3", color = "purple", ax =ax1)
ax3 = df2.plot(kind = "scatter", x = "Time", y= "AFS", color = "orange", ax = ax2)
plt.xlabel("Time")
plt.ylabel("PM1, PM3, AFS")
plt.title("Time vs PM1, PM3, AFS splits")
plt.show(ax1 == ax2 == ax3)

matplotlib multiple Y-axis pandas plot

Could someone give me a tip on how to do multiple Y axis plots?
This is some made up data below, how could I put Temperature its own Y axis, Pressure on its own Y axis, and then have both Value1 and Value2 on the same Y axis. I am trying to go for the same look and feel of this SO post answer. Thanks for any tips, I don't understand ax3 = ax.twinx() process, like as far as do I need to define an ax.twinx() for each separate Y axis plot I need?
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
rows,cols = 8760,4
data = np.random.rand(rows,cols)
tidx = pd.date_range('2019-01-01', periods=rows, freq='H')
df = pd.DataFrame(data, columns=['Temperature','Value1','Pressure','Value2'], index=tidx)
# using subplots() function
fig, ax = plt.subplots(figsize=(25,8))
plt.title('Multy Y Plot')
ax2 = ax.twinx()
ax3 = ax.twinx()
ax4 = ax.twinx()
plot1, = ax.plot(df.index, df.Temperature)
plot2, = ax2.plot(df.index, df.Value1, color = 'r')
plot3, = ax3.plot(df.index, df.Pressure, color = 'g')
plot4, = ax4.plot(df.index, df.Value2, color = 'b')
ax.set_xlabel('Date')
ax.set_ylabel('Temperature')
ax2.set_ylabel('Value1')
ax3.set_ylabel('Pressure')
ax4.set_ylabel('Value2')
plt.legend([plot1,plot2,plot3,plot4],list(df.columns))
# defining display layout
plt.tight_layout()
# show plot
plt.show()
This will output everything jumbled up on the same side without separate Y axis for Pressure, Value1, and Value2.

You are adding 4 different plots in one, which is not helpful. I would recommend breaking it into 2 plots w/ shared x-axis "Date":
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
rows,cols = 8760,4
data = np.random.rand(rows,cols)
tidx = pd.date_range('2019-01-01', periods=rows, freq='H')
df = pd.DataFrame(data, columns=['Temperature','Value1','Pressure','Value2'], index=tidx)
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(25,8))
plt.title('Multy Y Plot')
ax1b = ax1.twinx()
plot1a, = ax1.plot(df.index, df.Temperature)
plot1b, = ax1b.plot(df.index, df.Pressure, color='r')
ax1.set_ylabel('Temperature')
ax1b.set_ylabel('Pressure')
ax2b = ax2.twinx()
plot2a, = ax2.plot(df.index, df.Value1, color='k')
plot2b, = ax2b.plot(df.index, df.Value2, color='g')
ax2.set_xlabel('Date')
ax2.set_ylabel('Value1')
ax2b.set_ylabel('Value2')
plt.legend([plot1a, plot1b, plot2a, plot2b], df.columns)
# defining display layout
plt.tight_layout()
# show plot
plt.show()
Here I have added in the first plot (on the top) Temperature and Pressure and on the second plot (on the bottom) Value 1 and Value 2. Normally, we add in the same plot things that make sense to compare on the same x-axis. Pressure and Temperature is a valid combination that is why I combined those two together. But you can do as you wish.

This answer below uses mpatches is how to make the subplot of Value1 and Value2 on the same axis. The solution for this post has subplot for Value1 and Value2 on different axis. Thanks for the help #tzinie!
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
rows,cols = 8760,4
data = np.random.rand(rows,cols)
tidx = pd.date_range('2019-01-01', periods=rows, freq='H')
df = pd.DataFrame(data, columns=['Temperature','Value1','Pressure','Value2'], index=tidx)
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(25,8))
plt.title('Multy Y Plot')
ax1b = ax1.twinx()
plot1a, = ax1.plot(df.index, df.Temperature, color='r') # red
plot1b, = ax1b.plot(df.index, df.Pressure, color='b') # blue
ax1.set_ylabel('Temperature')
ax1b.set_ylabel('Pressure')
ax2.plot(df.index, df.Value1, color='k') # black
ax2.plot(df.index, df.Value2, color='g') # green
ax2.set_xlabel('Date')
ax2.set_ylabel('Value1 & Value2')
red_patch = mpatches.Patch(color='red', label='Temperature')
blue_patch = mpatches.Patch(color='blue', label='Pressure')
green_patch = mpatches.Patch(color='green', label='Value2')
black_patch = mpatches.Patch(color='black', label='Value1')
plt.legend(handles=[red_patch,blue_patch,green_patch,black_patch])
# defining display layout
#plt.tight_layout()
# show plot
plt.show()

Matplotlib subplot title, figure title formatting

How would I go about formatting the below pie chart subplots so that there is more white-space between the fig title and subplot titles. Ideally the subplot titles would also be in closer vicinity to the actual pie chart itself.
I can't seem to find anything in the docs which might enable this, but I'm new to matplotlib.
import matplotlib.pyplot as plt
import pandas as pd
from pandas import DataFrame, Series
m = {"Men" : {"Yes": 2, "No": 8}}
w = {"Women": {"Yes": 3, "No": 7}}
data = {**m, **w}
df = DataFrame(data)
fig, axes = plt.subplots(1, len(df.columns))
fig.suptitle("Would you prefer to work from home?", fontsize=18)
logging.debug("fig.axes: {}".format(fig.axes))
for i, ax in enumerate(fig.axes):
col = df.ix[:, i]
ax = fig.axes[i]
pcnt_col = col / col.sum() * 100
ax.set_title("{} (n={})".format(pcnt_col.name, col.sum()))
ax.pie(pcnt_col.values, labels=pcnt_col.index,
autopct="%1.1f%%", startangle=90)
ax.axis("equal")
plt.legend(loc="lower right", title="Answer", fancybox=True,
ncol=1, shadow=True)
plt.show()

Use subplots_adjust to separate the two
plt.subplots_adjust(top=0.75)
import matplotlib.pyplot as plt
import pandas as pd
from pandas import DataFrame, Series
m = {"Men" : {"Yes": 2, "No": 8}}
w = {"Women": {"Yes": 3, "No": 7}}
data = {**m, **w}
df = DataFrame(data)
fig, axes = plt.subplots(1, len(df.columns))
fig.suptitle("Would you prefer to work from home?", fontsize=18)
logging.debug("fig.axes: {}".format(fig.axes))
for i, ax in enumerate(fig.axes):
col = df.ix[:, i]
ax = fig.axes[i]
pcnt_col = col / col.sum() * 100
ax.set_title("{} (n={})".format(pcnt_col.name, col.sum()))
ax.pie(pcnt_col.values, labels=pcnt_col.index,
autopct="%1.1f%%", startangle=90)
ax.axis("equal")
plt.legend(loc="lower right", title="Answer", fancybox=True,
ncol=1, shadow=True)
plt.subplots_adjust(top=0.55)
plt.show()

Merge matplotlib subplots with shared x-axis

I have two graphs to where both have the same x-axis, but with different y-axis scalings.
The plot with regular axes is the data with a trend line depicting a decay while the y semi-log scaling depicts the accuracy of the fit.
fig1 = plt.figure(figsize=(15,6))
ax1 = fig1.add_subplot(111)
# Plot of the decay model
ax1.plot(FreqTime1,DecayCount1, '.', color='mediumaquamarine')
# Plot of the optimized fit
ax1.plot(x1, y1M, '-k', label='Fitting Function: $f(t) = %.3f e^{%.3f\t} \
%+.3f$' % (aR1,kR1,bR1))
ax1.set_xlabel('Time (sec)')
ax1.set_ylabel('Count')
ax1.set_title('Run 1 of Cesium-137 Decay')
# Allows me to change scales
# ax1.set_yscale('log')
ax1.legend(bbox_to_anchor=(1.0, 1.0), prop={'size':15}, fancybox=True, shadow=True)
Now, i'm trying to figure out to implement both close together like the examples supplied by this link
http://matplotlib.org/examples/pylab_examples/subplots_demo.html
In particular, this one
When looking at the code for the example, i'm a bit confused on how to implant 3 things:
1) Scaling the axes differently
2) Keeping the figure size the same for the exponential decay graph but having a the line graph have a smaller y size and same x size.
For example:
3) Keeping the label of the function to appear in just only the decay graph.
Any help would be most appreciated.

Look at the code and comments in it:
import matplotlib.pyplot as plt
import numpy as np
from matplotlib import gridspec
# Simple data to display in various forms
x = np.linspace(0, 2 * np.pi, 400)
y = np.sin(x ** 2)
fig = plt.figure()
# set height ratios for subplots
gs = gridspec.GridSpec(2, 1, height_ratios=[2, 1])
# the first subplot
ax0 = plt.subplot(gs[0])
# log scale for axis Y of the first subplot
ax0.set_yscale("log")
line0, = ax0.plot(x, y, color='r')
# the second subplot
# shared axis X
ax1 = plt.subplot(gs[1], sharex = ax0)
line1, = ax1.plot(x, y, color='b', linestyle='--')
plt.setp(ax0.get_xticklabels(), visible=False)
# remove last tick label for the second subplot
yticks = ax1.yaxis.get_major_ticks()
yticks[-1].label1.set_visible(False)
# put legend on first subplot
ax0.legend((line0, line1), ('red line', 'blue line'), loc='lower left')
# remove vertical gap between subplots
plt.subplots_adjust(hspace=.0)
plt.show()

Here is my solution:
import numpy as np
import matplotlib.pyplot as plt
x = np.linspace(0, 2 * np.pi, 400)
y = np.sin(x ** 2)
fig, (ax1,ax2) = plt.subplots(nrows=2, sharex=True, subplot_kw=dict(frameon=False)) # frameon=False removes frames
plt.subplots_adjust(hspace=.0)
ax1.grid()
ax2.grid()
ax1.plot(x, y, color='r')
ax2.plot(x, y, color='b', linestyle='--')
One more option is seaborn.FacetGrid but this requires Seaborn and Pandas libraries.

Here are some adaptions to show how the code could work to add a combined legend when plotting a pandas dataframe. ax=ax0 can be used to plot on a given ax and ax0.get_legend_handles_labels() gets the information for the legend.
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
dates = pd.date_range('20210101', periods=100, freq='D')
df0 = pd.DataFrame({'x': np.random.normal(0.1, 1, 100).cumsum(),
'y': np.random.normal(0.3, 1, 100).cumsum()}, index=dates)
df1 = pd.DataFrame({'z': np.random.normal(0.2, 1, 100).cumsum()}, index=dates)
fig, (ax0, ax1) = plt.subplots(nrows=2, sharex=True, gridspec_kw={'height_ratios': [2, 1], 'hspace': 0})
df0.plot(ax=ax0, color=['dodgerblue', 'crimson'], legend=False)
df1.plot(ax=ax1, color='limegreen', legend=False)
# put legend on first subplot
handles0, labels0 = ax0.get_legend_handles_labels()
handles1, labels1 = ax1.get_legend_handles_labels()
ax0.legend(handles=handles0 + handles1, labels=labels0 + labels1)
# remove last tick label for the second subplot
yticks = ax1.get_yticklabels()
yticks[-1].set_visible(False)
plt.tight_layout()
plt.show()

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

Plotting difficulty combining 3 variables and repositioning the legends in matplotlib - python

Related

Combine Binned barplot with lineplot

Put two y-axes on one graph

matplotlib multiple Y-axis pandas plot

Matplotlib subplot title, figure title formatting

Merge matplotlib subplots with shared x-axis

Categories

Resources