Plot dual axis graph using DataFrame - python

I have a DataFrame that looks like this:
state runtime pixels segments
0 Texas 0.079277 1756374 12960
1 California 0.045553 1221211 5129
2 Rhode Island 0.002466 8134 1247
3 Washington 0.016046 339786 6854
4 Alabama 0.009114 214936 1930
5 District of Columbia 0.000799 506 218
6 Ohio 0.007617 192800 2949
I am trying to plot this DataFrame with a dual y-axis along a shared x-axis (runtime)
I have done this with the below code:
import pandas as pd
import matplotlib.pyplot as plt
from pylab import figure, show, legend, ylabel
data = pd.read_excel('runtimes.xlsx')
## create the general figure
fig1 = figure()
ax1 = fig1.add_subplot(111)
ax1.plot(data[['runtime', 'pixels']].T)
ax1.set_ylabel('Pixels')
ax1.set_xlabel('Runtime (s)')
ax2 = ax1.twinx()
ax2.plot(data[['runtime', 'segments']].T)
ax2.set_ylabel('Segments', color='r')
for tl in ax2.get_yticklabels():
tl.set_color('r')
Which gives me this graph:
There are two problems I am trying to fix:
(1) This plots each row twice when I only want it to plot once - how can I fix this?
(2) How can I add a legend so you can tell which line indicates its proper state?

I find that it is usually easier to be explicit about the columns, instead of allowing pandas to do everything automatically, for these kinds of cases. E.g.
ax1.scatter(data['runtime'], data['pixels'])
and
ax2.scatter(data['runtime'], data['segments'])
For a complete example demonstrating this:
import pandas as pd
import matplotlib.pyplot as plt
from pylab import figure, show, legend, ylabel
data = pd.DataFrame({'runtime': [0.079277, 0.045553, 0.002466, 0.016046, 0.009114,
0.000799, 0.007617],
'pixels': [1756374, 1221211, 8134, 339786, 214936, 506, 192800],
'segments':[12960, 5129, 1247, 6854, 1930, 218, 2949]})
## create the general figure
fig1 = figure()
ax1 = fig1.add_subplot(111)
ax1.scatter(data['runtime'], data['pixels'], label="Pixels", marker='.', color='k')
ax1.set_ylabel('Pixels')
ax1.set_xlabel('Runtime (s)')
ax2 = ax1.twinx()
ax2.scatter(data['runtime'], data['segments'], label="Segments", marker='.', color='r')
ax2.set_ylabel('Segments', color='r')
for tl in ax2.get_yticklabels():
tl.set_color('r')
fig1.legend(bbox_to_anchor=(0.225,0.845))
plt.show()
You may also note the legend, you can change the location as you want by changing the bbox_to_anchor tuple, docs
Edit
If you need to color based on state then you can do something like this
import pandas as pd
import matplotlib.pyplot as plt
from pylab import figure, show, legend, ylabel
import matplotlib.lines as mlines
data = pd.DataFrame({'state': ["Texas", "California", "Rhode Island", "Washington",
"Alabama", "District of Columbia", "Ohio"],
'runtime': [0.079277, 0.045553, 0.002466, 0.016046,
0.009114, 0.000799, 0.007617],
'pixels': [1756374, 1221211, 8134, 339786, 214936, 506, 192800],
'segments':[12960, 5129, 1247, 6854, 1930, 218, 2949]})
## create the general figure
fig1 = figure()
ax1 = fig1.add_subplot(111)
ax2 = ax1.twinx()
for ii in range(len(data['state'])):
ax1.scatter(data['runtime'][ii], data['pixels'][ii],
label=data['state'][ii], marker='.')
ax2.scatter(data['runtime'][ii], data['segments'][ii], marker='+')
ax1.set_ylabel('Pixels')
ax1.set_xlabel('Runtime (s)')
legend = fig1.legend(bbox_to_anchor=(0.3,0.845))
m1 = mlines.Line2D([], [], color='black', linewidth = 0, marker='.', label='Pixels')
m2 = mlines.Line2D([], [], color='black', linewidth = 0, marker='+', label='Segments')
plt.legend(handles=[m1,m2], loc='lower right')
ax2.set_ylabel('Segments', color='r')
for tl in ax2.get_yticklabels():
tl.set_color('r')
plt.show()

Related

Python Seaborn heatmap with custom order on both axes and values from a frequency table (data included)

I have this data in a frequency table. I just want to be able to create a heatmap with Fac1 on Y axis, Fac2 on X axis and the frequency values as heatmap. The order of the Factors in Fac1 and Fac2 must be maintained in the same sequence (after removing duplicates from both Fac1 and Fac2 columns). I haven't been able to get this working after so many tries but I've managed to get the data in order and the simplest representation. I'd greatly appreciate any help in this.
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import seaborn as sns
url = "https://raw.githubusercontent.com/rroyss/stack/main/dfso.csv"
df = pd.read_csv(url)
plt.subplots(figsize=(15,30))
plt.tick_params(axis='both', which='major', labelsize=10, labelbottom = False, bottom=False, top = True, labeltop=True)
sns.heatmap(df, cmap="Blues", linewidth=1, xticklabels=True, yticklabels=True)
You have to convert your dataframe if you want to sue heatmap:
df2 = df.drop_duplicates().pivot_table(index='Fac1', columns='Fac2', values='Frequency Fac1-Fac2 pair', sort=False)
plt.subplots(figsize=(15, 30))
plt.tick_params(axis='both', which='major', labelsize=10, labelbottom=False, bottom=False, top=True, labeltop=True)
sns.heatmap(df2, cmap="Blues", linewidth=1, xticklabels=True, yticklabels=True)
This is the result (zoomed on the first rows and columns):
You first need to reorganize the dataframe such that Fac1 becomes the index, Fac2 the columns, and the values are aggregated from the third column. E.g. df_pivoted = df.pivot_table(index='Fac1', columns='Fac2', values='Frequency Fac1-Fac2 pair').
The heatmap will use the order provided by the columns and index as created by pivot_table. Keeping the original order is a bit tricky, but can be achieved by pd.Categorical (which forces an order) combined by pd.unique() (which keeps the original order, unlike np.unique).
from matplotlib import pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
url = "https://raw.githubusercontent.com/rroyss/stack/main/dfso.csv"
df = pd.read_csv(url)
df['Fac1'] = pd.Categorical(df['Fac1'], categories=pd.unique(df['Fac1']))
df['Fac2'] = pd.Categorical(df['Fac2'], categories=pd.unique(df['Fac2']))
df_pivoted = df.pivot_table(index='Fac1', columns='Fac2', values='Frequency Fac1-Fac2 pair')
fig, ax = plt.subplots(figsize=(20, 30))
sns.heatmap(data=df_pivoted, cmap='Blues', xticklabels=True, yticklabels=True, ax=ax)
ax.tick_params(axis='both', which='major', labelsize=10, labeltop=True, top=True, labelbottom=False, bottom=False)
ax.tick_params(axis='x', labelrotation=90)
plt.tight_layout()
plt.show()
If you are aiming for a 2d histogram or kde plot where the last column is intended as weights:
from matplotlib import pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
url = "https://raw.githubusercontent.com/rroyss/stack/main/dfso.csv"
df = pd.read_csv(url)
df['Fac1'] = [int(f[5:]) for f in df['Fac1']]
df['Fac2'] = [int(f[6:]) for f in df['Fac2']]
fig, (ax1, ax2) = plt.subplots(ncols=2, figsize=(20, 10))
sns.histplot(data=df, x='Fac1', y='Fac2', weights='Frequency Fac1-Fac2 pair', bins=20, color='blue', ax=ax1)
sns.kdeplot(data=df, x='Fac1', y='Fac2', weights='Frequency Fac1-Fac2 pair', color='blue', ax=ax2)
for ax in (ax1, ax2):
ax.tick_params(axis='both', which='major', labelsize=10)
plt.tight_layout()
plt.show()

How to set space between plot and colormap table

I am using secondary y-axis and cmap color but when I plot together the color bar cross to my plot
here is my code
fig,ax1=plt.subplots()
ax1 = df_Combine.plot.scatter('Parameter2', 'NPV (MM €)', marker='s', s=500, ylim=(-10,60), c='Lifetime1 (a)', colormap='jet_r', vmin=0, vmax=25, ax=ax1)
graph.axhline(0, color='k')
plt.xticks(rotation=90)
ax2 = ax1.twinx()
ax2.plot(df_Combine_min_select1["CumEnergy1 (kWH)"])
plt.show()
and here is my plotting
anyone can help how to solve this issue?
Thank you
When you let pandas automatically create a colorbar, you don't have positioning options. Therefore, you can create the colorbar in a separate step and provide the pad= parameter to set a wider gap. Default, pad is 0.05, meaning 5% of the width of the subplot.
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
fig, ax1 = plt.subplots()
df_Combine = pd.DataFrame({'Parameter2': np.random.rand(10) * 10,
'NPV (MM €)': np.random.rand(10),
'Lifetime1 (a)': np.random.rand(10) * 25,
})
ax1 = df_Combine.plot.scatter('Parameter2', 'NPV (MM €)', marker='s', s=500, ylim=(-10, 60), c='Lifetime1 (a)',
colormap='jet_r', vmin=0, vmax=25, ax=ax1, colorbar=False)
plt.colorbar(ax1.collections[0], ax=ax1, pad=0.1)
ax2 = ax1.twinx()
ax2.plot(np.random.rand(10))
plt.show()

How to plot a paired histogram using seaborn

I would like to make a paired histogram like the one shown here using the seaborn distplot.
This kind of plot can also be referred to as the back-to-back histogram shown here, or a bihistogram inverted/mirrored along the x-axis as discussed here.
Here is my code:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
green = np.random.normal(20,10,1000)
blue = np.random.poisson(60,1000)
fig, ax = plt.subplots(figsize=(8,6))
sns.distplot(blue, hist=True, kde=True, hist_kws={'edgecolor':'black'}, kde_kws={'linewidth':2}, bins=10, color='blue')
sns.distplot(green, hist=True, kde=True, hist_kws={'edgecolor':'black'}, kde_kws={'linewidth':2}, bins=10, color='green')
ax.set_xticks(np.arange(-20,121,20))
ax.set_yticks(np.arange(0.0,0.07,0.01))
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
plt.show()
Here is the output:
When I use the method discussed here (plt.barh), I get the bar plot shown just below, which is not what I am looking for.
Or maybe I haven't understood the workaround well enough...
A simple/short implementation of python-seaborn-distplot similar to these kinds of plots would be perfect. I edited the figure of my first plot above to show the kind of plot I hope to achieve (though y-axis not upside down):
Any leads would be greatly appreciated.
You could use two subplots and invert the y-axis of the lower one and plot with the same bins.
df = pd.DataFrame({'a': np.random.normal(0,5,1000), 'b': np.random.normal(20,5,1000)})
fig =plt.figure(figsize=(5,5))
ax = fig.add_subplot(211)
ax2 = fig.add_subplot(212)
bins = np.arange(-20,40)
ax.hist(df['a'], bins=bins)
ax2.hist(df['b'],color='orange', bins=bins)
ax2.invert_yaxis()
edit:
improvements suggested by #mwaskom
fig, axes = plt.subplots(nrows=2, ncols=1, sharex=True, figsize=(5,5))
bins = np.arange(-20,40)
for ax, column, color, invert in zip(axes.ravel(), df.columns, ['teal', 'orange'], [False,True]):
ax.hist(df[column], bins=bins, color=color)
if invert:
ax.invert_yaxis()
plt.subplots_adjust(hspace=0)
Here is a possible approach using seaborn's displots.
Seaborn doesn't return the created graphical elements, but the ax can be interrogated. To make sure the ax only contains the elements you want upside down, those elements can be drawn first. Then, all the patches (the rectangular bars) and the lines (the curve for the kde) can be given their height in negative. Optionally the x-axis can be set at y == 0 using ax.spines['bottom'].set_position('zero').
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
green = np.random.normal(20, 10, 1000)
blue = np.random.poisson(60, 1000)
fig, ax = plt.subplots(figsize=(8, 6))
sns.distplot(green, hist=True, kde=True, hist_kws={'edgecolor': 'black'}, kde_kws={'linewidth': 2}, bins=10,
color='green')
for p in ax.patches: # turn the histogram upside down
p.set_height(-p.get_height())
for l in ax.lines: # turn the kde curve upside down
l.set_ydata(-l.get_ydata())
sns.distplot(blue, hist=True, kde=True, hist_kws={'edgecolor': 'black'}, kde_kws={'linewidth': 2}, bins=10,
color='blue')
ax.set_xticks(np.arange(-20, 121, 20))
ax.set_yticks(np.arange(0.0, 0.07, 0.01))
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
pos_ticks = np.array([t for t in ax.get_yticks() if t > 0])
ticks = np.concatenate([-pos_ticks[::-1], [0], pos_ticks])
ax.set_yticks(ticks)
ax.set_yticklabels([f'{abs(t):.2f}' for t in ticks])
ax.spines['bottom'].set_position('zero')
plt.show()

Unable to generate legend using python / matlibplot for 4 lines all labelled

Want labels for Bollinger Bands (R) ('upper band', 'rolling mean', 'lower band') to show up in legend. But legend just applies the same label to each line with the pandas label for the first (only) column, 'IBM'.
# Plot price values, rolling mean and Bollinger Bands (R)
ax = prices['IBM'].plot(title="Bollinger Bands")
rm_sym.plot(label='Rolling mean', ax=ax)
upper_band.plot(label='upper band', c='r', ax=ax)
lower_band.plot(label='lower band', c='r', ax=ax)
#
# Add axis labels and legend
ax.set_xlabel("Date")
ax.set_ylabel("Adjusted Closing Price")
ax.legend(loc='upper left')
plt.show()
I know this code may represent a fundamental lack of understanding of how matlibplot works so explanations are particularly welcome.
The problem is most probably that whatever upper_band and lower_band are, they are not labeled.
One option is to label them by putting them as column to a dataframe. This will allow to plot the dataframe column directly.
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
y =np.random.rand(4)
yupper = y+0.2
ylower = y-0.2
df = pd.DataFrame({"price" : y, "upper": yupper, "lower": ylower})
fig, ax = plt.subplots()
df["price"].plot(label='Rolling mean', ax=ax)
df["upper"].plot(label='upper band', c='r', ax=ax)
df["lower"].plot(label='lower band', c='r', ax=ax)
ax.legend(loc='upper left')
plt.show()
Otherwise you can also plot the data directly.
import matplotlib.pyplot as plt
import numpy as np
y =np.random.rand(4)
yupper = y+0.2
ylower = y-0.2
fig, ax = plt.subplots()
ax.plot(y, label='Rolling mean')
ax.plot(yupper, label='upper band', c='r')
ax.plot(ylower, label='lower band', c='r')
ax.legend(loc='upper left')
plt.show()
In both cases, you'll get a legend with labels. If that isn't enough, I recommend reading the Matplotlib Legend Guide which also tells you how to manually add labels to legends.

seaborn or matplotlib grid is covering the lines in the plot

Here is my (incomplete, I have note added the data itself) code, which produces a somewhat confusing plot, where one line is covered by the grid but the other not.
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pylab
sns.set_context("poster",font_scale=fs)
sns.set_style("darkgrid") # No grid lines
# sns.set_style({'legend.frameon': 'True'})
sns.set_style({'xtick.major.size':'0.0'})
c1,c2 = sns.color_palette("hls",2)#sns.color_palette("colorblind", 2)
a = sns.color_palette("BuGn_r")
# runs_plot = pd.DataFrame(runs.values+8.5)
# Plot just first state trajectory
fig, ax1 = plt.subplots(1,sharey=True, sharex=True, figsize=(30,8))
ax1.plot((ground.values+6),label='Ground Truth',color=c1)
ax1.set_xlabel('Time [$s$]')
ax1.set_ylim(0,10)
ax1.set_ylabel('State [$\#$]')
for tl in ax1.get_yticklabels():
tl.set_color(c1)
ax2 = ax1.twinx()
ax2.plot(0.4*signal_syn.values+1,color=c2,label='Emission Signal')
ax2.set_ylabel('Observations')
ax2.set_ylim(0,10)
# ax2.set_axisbelow(True)
for tl in ax2.get_yticklabels():
tl.set_color(c2)
# ask matplotlib for the plotted objects and their labels
lines, labels = ax1.get_legend_handles_labels()
lines2, labels2 = ax2.get_legend_handles_labels()
ax2.legend(lines + lines2, labels + labels2,ncol=5,loc='upper center', bbox_to_anchor=(0.5, -0.2))
plt.show()
which produces
now and you can probably see, that for the "Ground Truth" the line is covered by the 'darkgrid' option of the seaborn (which produces a white grid as seen above). Now for some reason the grid is not above the emission signal but only the ground truth.
Any ideas for why this might be?
So this is what I ended up doing, it is probably more of a hack than an actual solution, but it works. I just moved the plotting elements so that they're all plotted above the grid.
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pylab
sns.set_context("poster",font_scale=fs)
sns.set_style("darkgrid") # No grid lines
# sns.set_style({'legend.frameon': 'True'})
sns.set_style({'xtick.major.size':'0.0'})
c1,c2 = sns.color_palette("hls",2)#sns.color_palette("colorblind", 2)
a = sns.color_palette("BuGn_r")
# runs_plot = pd.DataFrame(runs.values+8.5)
# Plot just first state trajectory
fig, ax1 = plt.subplots(1,sharey=True, sharex=True, figsize=(30,8))
ax1.set_xlabel('Time [$s$]')
ax1.set_ylim(0,10)
ax1.set_ylabel('State [$\#$]')
for tl in ax1.get_yticklabels():
tl.set_color(c1)
ax2 = ax1.twinx()
ax2.plot((ground.values+6),label='Ground Truth',color=c1)
ax2.plot(0.4*signal_syn.values+1,color=c2,label='Emission Signal')
ax2.set_ylabel('Observations')
ax2.set_ylim(0,10)
# ax2.set_axisbelow(True)
for tl in ax2.get_yticklabels():
tl.set_color(c2)
# ask matplotlib for the plotted objects and their labels
lines, labels = ax1.get_legend_handles_labels()
lines2, labels2 = ax2.get_legend_handles_labels()
ax2.legend(lines + lines2, labels + labels2,ncol=5,loc='upper center', bbox_to_anchor=(0.5, -0.2))
plt.show()
Seems like the answer is in this question:
Matplotlib: draw grid lines behind other graph elements
And it is basically: Axis.set_axisbelow(True)

Categories