Python - dual y axis chart, align zero - python

I'm trying to create a horizontal bar chart, with dual x axes. The 2 axes are very different in scale, 1 set goes from something like -5 to 15 (positive and negative value), the other set is more like 100 to 500 (all positive values).
When I plot this, I'd like to align the 2 axes so zero shows at the same position, and only the negative values are to the left of this. Currently the set with all positive values starts at the far left, and the set with positive and negative starts in the middle of the overall plot.
I found the align_yaxis example, but I'm struggling to align the x axes.
Matplotlib bar charts: Aligning two different y axes to zero
Here is an example of what I'm working on with simple test data. Any ideas/suggestions? thanks
import pandas as pd
import matplotlib.pyplot as plt
d = {'col1':['Test 1','Test 2','Test 3','Test 4'],'col 2':[1.4,-3,1.3,5],'Col3':[900,750,878,920]}
df = pd.DataFrame(data=d)
fig = plt.figure() # Create matplotlib figure
ax = fig.add_subplot(111) # Create matplotlib axes
ax2 = ax.twiny() # Create another axes that shares the same y-axis as ax.
width = 0.4
df['col 2'].plot(kind='barh', color='darkblue', ax=ax, width=width, position=1,fontsize =4, figsize=(3.0, 5.0))
df['Col3'].plot(kind='barh', color='orange', ax=ax2, width=width, position=0, fontsize =4, figsize=(3.0, 5.0))
ax.set_yticklabels(df.col1)
ax.set_xlabel('Positive and Neg',color='darkblue')
ax2.set_xlabel('Positive Only',color='orange')
ax.invert_yaxis()
plt.show()

I followed the link from a question and eventually ended up at this answer : https://stackoverflow.com/a/10482477/5907969
The answer has a function to align the y-axes and I have modified the same to align x-axes as follows:
def align_xaxis(ax1, v1, ax2, v2):
"""adjust ax2 xlimit so that v2 in ax2 is aligned to v1 in ax1"""
x1, _ = ax1.transData.transform((v1, 0))
x2, _ = ax2.transData.transform((v2, 0))
inv = ax2.transData.inverted()
dx, _ = inv.transform((0, 0)) - inv.transform((x1-x2, 0))
minx, maxx = ax2.get_xlim()
ax2.set_xlim(minx+dx, maxx+dx)
And then use it within the code as follows:
import pandas as pd
import matplotlib.pyplot as plt
d = {'col1':['Test 1','Test 2','Test 3','Test 4'],'col 2' [1.4,-3,1.3,5],'Col3':[900,750,878,920]}
df = pd.DataFrame(data=d)
fig = plt.figure() # Create matplotlib figure
ax = fig.add_subplot(111) # Create matplotlib axes
ax2 = ax.twiny() # Create another axes that shares the same y-axis as ax.
width = 0.4
df['col 2'].plot(kind='barh', color='darkblue', ax=ax, width=width, position=1,fontsize =4, figsize=(3.0, 5.0))
df['Col3'].plot(kind='barh', color='orange', ax=ax2, width=width, position=0, fontsize =4, figsize=(3.0, 5.0))
ax.set_yticklabels(df.col1)
ax.set_xlabel('Positive and Neg',color='darkblue')
ax2.set_xlabel('Positive Only',color='orange')
align_xaxis(ax,0,ax2,0)
ax.invert_yaxis()
plt.show()
This will give you what you're looking for

Related

Combine Binned barplot with lineplot

I'd like to represent two datasets on the same plot, one as a line as one as a binned barplot. I can do each individually:
tobar = pd.melt(pd.DataFrame(np.random.randn(1000).cumsum()))
tobar["bins"] = pd.qcut(tobar.index, 20)
bp = sns.barplot(data=tobar, x="bins", y="value")
toline = pd.melt(pd.DataFrame(np.random.randn(1000).cumsum()))
lp = sns.lineplot(data=toline, x=toline.index, y="value")
But when I try to combine them, of course the x axis gets messed up:
fig, ax = plt.subplots()
ax2 = ax.twinx()
bp = sns.barplot(data=tobar, x="bins", y="value", ax=ax)
lp = sns.lineplot(data=toline, x=toline.index, y="value", ax=ax2)
bp.set(xlabel=None)
I also can't seem to get rid of the bin labels.
How can I get these two informations on the one plot?
This answer explains why it's better to plot the bars with matplotlib.axes.Axes.bar instead of sns.barplot or pandas.DataFrame.bar.
In short, the xtick locations correspond to the actual numeric value of the label, whereas the xticks for seaborn and pandas are 0 indexed, and don't correspond to the numeric value.
This answer shows how to add bar labels.
ax2 = ax.twinx() can be used for the line plot if needed
Works the same if the line plot is different data.
Tested in python 3.11, pandas 1.5.2, matplotlib 3.6.2, seaborn 0.12.1
Imports and DataFrame
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
# test data
np.random.seed(2022)
df = pd.melt(pd.DataFrame(np.random.randn(1000).cumsum()))
# create the bins
df["bins"] = pd.qcut(df.index, 20)
# add a column for the mid point of the interval
df['mid'] = df.bins.apply(lambda row: row.mid.round().astype(int))
# pivot the dataframe to calculate the mean of each interval
pt = df.pivot_table(index='mid', values='value', aggfunc='mean').reset_index()
Plot 1
# create the figure
fig, ax = plt.subplots(figsize=(30, 7))
# add a horizontal line at y=0
ax.axhline(0, color='black')
# add the bar plot
ax.bar(data=pt, x='mid', height='value', width=4, alpha=0.5)
# set the labels on the xticks - if desired
ax.set_xticks(ticks=pt.mid, labels=pt.mid)
# add the intervals as labels on the bars - if desired
ax.bar_label(ax.containers[0], labels=df.bins.unique(), weight='bold')
# add the line plot
_ = sns.lineplot(data=df, x=df.index, y="value", ax=ax, color='tab:orange')
Plot 2
fig, ax = plt.subplots(figsize=(30, 7))
ax.axhline(0, color='black')
ax.bar(data=pt, x='mid', height='value', width=4, alpha=0.5)
ax.set_xticks(ticks=pt.mid, labels=df.bins.unique(), rotation=45)
ax.bar_label(ax.containers[0], weight='bold')
_ = sns.lineplot(data=df, x=df.index, y="value", ax=ax, color='tab:orange')
Plot 3
The bar width is the width of the interval
fig, ax = plt.subplots(figsize=(30, 7))
ax.axhline(0, color='black')
ax.bar(data=pt, x='mid', height='value', width=50, alpha=0.5, ec='k')
ax.set_xticks(ticks=pt.mid, labels=df.bins.unique(), rotation=45)
ax.bar_label(ax.containers[0], weight='bold')
_ = sns.lineplot(data=df, x=df.index, y="value", ax=ax, color='tab:orange')

How to center shared y-labels for two horizontal bar plots?

I got a little problem and google couldn't really help me out. Here's my code:
from matplotlib import pyplot as plt
import numpy as np
job_r = list(ct_t.JobRole.unique())
att_y = ct_t[ct_t['Attrition']=='Yes']['Percentage'].values
att_n = ct_t[ct_t['Attrition']=='No']['Percentage'].values
# Sort by number of sales staff
idx = att_n.argsort()
job_r, att_y, att_n = [np.take(x, idx) for x in [job_r, att_y, att_n]]
y = np.arange(att_y.size)
fig, axes = plt.subplots(ncols=2, sharey=True, figsize=[8,8])
axes[0].barh(y, att_n, align='center', color='#43e653', zorder=10)
axes[0].set(title='NO')
axes[1].barh(y, att_y, align='center', color='#ed1c3c', zorder=10)
axes[1].set(title='YES')
axes[0].invert_xaxis()
axes[0].set(yticks=y, yticklabels=job_r)
axes[0].yaxis.tick_right()
for ax in axes.flat:
ax.margins(0.03)
ax.grid(True)
fig.tight_layout()
fig.subplots_adjust(wspace=0.7)
plt.show()
My current output:
Is there any way to center the shared y labels in the middle between those two subplots?
Can I increase the x-axis up to 1.0 ? Each time I do something like: axes[1].set_xlabels(1.0) my whole plot turns upside down.
Here is a way to achieve the desired plot.
Extending the x limits to 1 is easier if done before inverting the x-axis. The code to center and reposition the labels come from this post.
import matplotlib.pyplot as plt
import matplotlib.transforms
import numpy as np
# first create some test data compatible with the question's data
job_r = ["".join(np.repeat(letter, np.random.randint(4, 15))) for letter in 'ABCDEFG']
att_y = np.random.uniform(0.5, 0.9, len(job_r))
att_n = 1 - att_y
# Sort by number of sales staff
idx = att_n.argsort()
job_r, att_y, att_n = [np.take(x, idx) for x in [job_r, att_y, att_n]]
y = np.arange(att_y.size)
fig, axes = plt.subplots(ncols=2, sharey=True, figsize=[8, 8])
axes[0].barh(y, att_n, align='center', color='#43e653', zorder=10)
axes[0].set(title='NO')
axes[1].barh(y, att_y, align='center', color='#ed1c3c', zorder=10)
axes[1].set(title='YES')
axes[1].set_xlim(xmax=1)
axes[0].set(yticks=y, yticklabels=job_r)
axes[0].yaxis.tick_right()
axes[0].set_xlim(xmax=1)
axes[0].invert_xaxis()
for ax in axes:
ax.margins(0.03)
ax.grid(True)
fig.tight_layout()
fig.subplots_adjust(wspace=0.7)
plt.setp(axes[0].yaxis.get_majorticklabels(), ha='center')
# Create offset transform by some points in x direction
dx = 60 / 72.
dy = 0 / 72.
offset = matplotlib.transforms.ScaledTranslation(dx, dy, fig.dpi_scale_trans)
# apply offset transform to all y ticklabels.
for label in axes[0].yaxis.get_majorticklabels():
label.set_transform(label.get_transform() + offset)
plt.show()

How to make a bar chart with only a height indicator not showing the full bar?

This chart almost looks good but is probably not the way to model this in matplotlib. How to have two horizontal bars that extend to the left and right of vertical line at an x-point to show the change of the two datasets eg SDR from 0.7 to 0.25.
Currently i patch things together with '$-$' markers which make misaligned legends and i am not able to place properly. If i change the figsize the markers start misaligning from the vertical bar at their x-point, eg SDR.
How to model this kind of chart proberly?
layer0 = np.random.random(10)
fig, ax = plt.subplots(1,1, figsize=(15/2,1.5*2.5),)
ind = np.arange(10, dtype=np.float64)*1#cordx
ax.plot(ind[0::2]+0.05, layer0[0::2]-0.04, ls='None', marker='$-$', markersize=40)
ax.plot(ind[1::2]-0.15, layer0[1::2]-0.04, ls='None', marker='$-$', markersize=40)
ax.set_ylim(0,1.05)
ax.set_yticks(np.arange(0, 1.1, step=0.1))
ax.set_xticks(ind[0::2]+0.5)
ax.set_xticklabels( ('SDR', 'SSR', 'SCR', 'RCR', 'GUR') )
plt.grid(b=True)
plt.grid(color='black', which='major', axis='y', linestyle='--', lw=0.2)
plt.show()
Alternatively, you can use a horizontal bar chart barh which is more intuitive in this case. Here the key parameter is left which will shift your horizontal bar charts to left/right.
Following is a complete answer:
import numpy as np
import matplotlib.pyplot as plt
np.random.seed(2)
layer0 = np.random.random(10)
fig, ax = plt.subplots(1,1, figsize=(15/2,1.5*2.5),)
n = 10
width = 0.5
ind = np.arange(n, dtype=np.float64)*1#cordx
ax.barh(layer0[0::2], [width]*int(n/2), height=0.01, left = ind[0::2])
ax.barh(layer0[1::2], [width]*int(n/2), height=0.01, left = ind[0::2]+width)
ax.set_ylim(0,1.05)
ax.set_yticks(np.arange(0, 1.1, step=0.1))
ax.set_xticks(ind[0::2]+0.5)
ax.set_xticklabels( ('SDR', 'SSR', 'SCR', 'RCR', 'GUR') )
plt.grid(b=True)
plt.grid(color='black', which='major', axis='y', linestyle='--', lw=0.2)
plt.show()
up until now i havent thought of bar charts with bottom offset, which seems to be ok:
layer0 = np.random.random(10)
fig, ax = plt.subplots(1,1, figsize=(15/1.3,1.5*2.5),)# sharey=True)
ind = np.arange(10, dtype=np.float64)*1#cordx
height=0.03
width=0.8
ax.bar(ind[0::2]-width/2, height, width=width, bottom=layer0[0::2]-height)
ax.bar(ind[0::2]+width/2, height, width=width, bottom=layer0[1::2]-height)
ax.set_ylim(-0.,1.05)
plt.grid(color='black', which='major', axis='x', linestyle='-', lw=0.8)

Two Y axis Bar plot: custom xticks

I am trying to add custom xticks to a relatively complicated bar graph plot and I am stuck.
I am plotting from two data frames, merged_90 and merged_15:
merged_15
Volume y_err_x Area_2D y_err_y
TripDate
2015-09-22 1663.016032 199.507503 1581.591701 163.473202
merged_90
Volume y_err_x Area_2D y_err_y
TripDate
1990-06-10 1096.530711 197.377497 1531.651913 205.197493
I want to create a bar graph with two axes (i.e. Area_2D and Volume) where the Area_2D and Volume bars are grouped based on their respective data frame. An example script would look like:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import scipy
fig = plt.figure()
ax1 = fig.add_subplot(111)
merged_90.Volume.plot(ax=ax1, color='orange', kind='bar',position=2.5, yerr=merged_90['y_err_x'] ,use_index=False , width=0.1)
merged_15.Volume.plot(ax=ax1, color='red', kind='bar',position=0.9, yerr=merged_15['y_err_x'] ,use_index=False, width=0.1)
ax2 = ax1.twinx()
merged_90.Area_2D.plot(ax=ax2,color='green', kind='bar',position=3.5, yerr=merged_90['y_err_y'],use_index=False, width=0.1)
merged_15.Area_2D.plot(ax=ax2,color='blue', kind='bar',position=0, yerr=merged_15['y_err_y'],use_index=False, width=0.1)
ax1.set_xlim(-0.5,0.2)
x = scipy.arange(1)
ax2.set_xticks(x)
ax2.set_xticklabels(['2015'])
plt.tight_layout()
plt.show()
The resulting plot is:
One would think I could change:
x = scipy.arange(1)
ax2.set_xticks(x)
ax2.set_xticklabels(['2015'])
to
x = scipy.arange(2)
ax2.set_xticks(x)
ax2.set_xticklabels(['1990','2015'])
but that results in:
I would like to see the ticks ordered in chronological order (i.e. 1990,2015)
Thanks!
Have you considered dropping the second axis and plotting them as follows:
ind = np.array([0,0.3])
width = 0.1
fig, ax = plt.subplots()
Rects1 = ax.bar(ind, [merged_90.Volume.values, merged_15.Volume.values], color=['orange', 'red'] ,width=width)
Rects2 = ax.bar(ind + width, [merged_90.Area_2D.values, merged_15.Area_2D.values], color=['green', 'blue'] ,width=width)
ax.set_xticks([.1,.4])
ax.set_xticklabels(('1990','2015'))
This produces:
I omitted the error and colors but you can easily add them. That would produce a readable graph given your test data. As you mentioned in comments you would still rather have two axes, presumably for different data with proper scales. To do this you could do:
fig = plt.figure()
ax1 = fig.add_subplot(111)
merged_90.Volume.plot(ax=ax, color='orange', kind='bar',position=2.5, use_index=False , width=0.1)
merged_15.Volume.plot(ax=ax, color='red', kind='bar',position=1.0, use_index=False, width=0.1)
ax2 = ax1.twinx()
merged_90.Area_2D.plot(ax=ax,color='green', kind='bar',position=3.5,use_index=False, width=0.1)
merged_15.Area_2D.plot(ax=ax,color='blue', kind='bar',position=0,use_index=False, width=0.1)
ax1.set_xlim([-.45, .2])
ax2.set_xlim(-.45, .2])
ax1.set_xticks([-.35, 0])
ax1.set_xticklabels([1990, 2015])
This produces:
Your problem was with resetting just one axis limit and not the other, they are created as twins but do not necessarily follow the changes made to one another.

Merge matplotlib subplots with shared x-axis

I have two graphs to where both have the same x-axis, but with different y-axis scalings.
The plot with regular axes is the data with a trend line depicting a decay while the y semi-log scaling depicts the accuracy of the fit.
fig1 = plt.figure(figsize=(15,6))
ax1 = fig1.add_subplot(111)
# Plot of the decay model
ax1.plot(FreqTime1,DecayCount1, '.', color='mediumaquamarine')
# Plot of the optimized fit
ax1.plot(x1, y1M, '-k', label='Fitting Function: $f(t) = %.3f e^{%.3f\t} \
%+.3f$' % (aR1,kR1,bR1))
ax1.set_xlabel('Time (sec)')
ax1.set_ylabel('Count')
ax1.set_title('Run 1 of Cesium-137 Decay')
# Allows me to change scales
# ax1.set_yscale('log')
ax1.legend(bbox_to_anchor=(1.0, 1.0), prop={'size':15}, fancybox=True, shadow=True)
Now, i'm trying to figure out to implement both close together like the examples supplied by this link
http://matplotlib.org/examples/pylab_examples/subplots_demo.html
In particular, this one
When looking at the code for the example, i'm a bit confused on how to implant 3 things:
1) Scaling the axes differently
2) Keeping the figure size the same for the exponential decay graph but having a the line graph have a smaller y size and same x size.
For example:
3) Keeping the label of the function to appear in just only the decay graph.
Any help would be most appreciated.
Look at the code and comments in it:
import matplotlib.pyplot as plt
import numpy as np
from matplotlib import gridspec
# Simple data to display in various forms
x = np.linspace(0, 2 * np.pi, 400)
y = np.sin(x ** 2)
fig = plt.figure()
# set height ratios for subplots
gs = gridspec.GridSpec(2, 1, height_ratios=[2, 1])
# the first subplot
ax0 = plt.subplot(gs[0])
# log scale for axis Y of the first subplot
ax0.set_yscale("log")
line0, = ax0.plot(x, y, color='r')
# the second subplot
# shared axis X
ax1 = plt.subplot(gs[1], sharex = ax0)
line1, = ax1.plot(x, y, color='b', linestyle='--')
plt.setp(ax0.get_xticklabels(), visible=False)
# remove last tick label for the second subplot
yticks = ax1.yaxis.get_major_ticks()
yticks[-1].label1.set_visible(False)
# put legend on first subplot
ax0.legend((line0, line1), ('red line', 'blue line'), loc='lower left')
# remove vertical gap between subplots
plt.subplots_adjust(hspace=.0)
plt.show()
Here is my solution:
import numpy as np
import matplotlib.pyplot as plt
x = np.linspace(0, 2 * np.pi, 400)
y = np.sin(x ** 2)
fig, (ax1,ax2) = plt.subplots(nrows=2, sharex=True, subplot_kw=dict(frameon=False)) # frameon=False removes frames
plt.subplots_adjust(hspace=.0)
ax1.grid()
ax2.grid()
ax1.plot(x, y, color='r')
ax2.plot(x, y, color='b', linestyle='--')
One more option is seaborn.FacetGrid but this requires Seaborn and Pandas libraries.
Here are some adaptions to show how the code could work to add a combined legend when plotting a pandas dataframe. ax=ax0 can be used to plot on a given ax and ax0.get_legend_handles_labels() gets the information for the legend.
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
dates = pd.date_range('20210101', periods=100, freq='D')
df0 = pd.DataFrame({'x': np.random.normal(0.1, 1, 100).cumsum(),
'y': np.random.normal(0.3, 1, 100).cumsum()}, index=dates)
df1 = pd.DataFrame({'z': np.random.normal(0.2, 1, 100).cumsum()}, index=dates)
fig, (ax0, ax1) = plt.subplots(nrows=2, sharex=True, gridspec_kw={'height_ratios': [2, 1], 'hspace': 0})
df0.plot(ax=ax0, color=['dodgerblue', 'crimson'], legend=False)
df1.plot(ax=ax1, color='limegreen', legend=False)
# put legend on first subplot
handles0, labels0 = ax0.get_legend_handles_labels()
handles1, labels1 = ax1.get_legend_handles_labels()
ax0.legend(handles=handles0 + handles1, labels=labels0 + labels1)
# remove last tick label for the second subplot
yticks = ax1.get_yticklabels()
yticks[-1].set_visible(False)
plt.tight_layout()
plt.show()

Categories