Modifying y-axis in histogram in Pandas matplotlib - python

I have 33960 - 0's and 144 - 1's in data_train['fk_action_code_id'].
On plotting histogram, the bar of 1 is so less that it is not visible. Is there any way I can raise the bar of 1 by modifying the Y-Axis so that the bar of 1 is visible?
I tried this but it doesn't work
b=[0,145, 35000]
plt.yticks(b)
plt.hist(data_train['fk_action_code_id'], histtype='bar', rwidth=0.8)

A few suggestions: you could
1.) create two y axes, one for the zeros and the other for the ones
2.) multiply one of the bars by a numerical factor, so that they are of the same order of magnitude (you should explain this in the plot legend then)
3.) draw a logarithmic histogram with the option log=True in the plt.hist() command.
The following will produce plots for these three options:
import numpy as np
import matplotlib.pyplot as plt
zeros = np.zeros([35000])
modifier = 100
ones = np.ones([145*modifier])
arr = np.hstack((zeros, ones))
bins = np.asarray([-0.5, 0.5, 1.5])
plt.hist(arr, bins=bins, facecolor='green', alpha=0.75, log=False)
plt.xticks([0,1])
plt.title('Multiplied with a factor')
plt.savefig('multiplied.png')
plt.show()
plt.clf()
modifier = 1
ones = np.ones([145*modifier])
arr = np.hstack((zeros, ones))
plt.hist(arr, bins=bins, facecolor='green', alpha=0.75, log=True)
plt.xticks([0,1])
plt.title('Logarithmic')
plt.savefig('log.png')
plt.show()
plt.clf()
ax1 = plt.gca()
ax2 = ax1.twinx()
ax1.set_yticks([0, 35000, 40000])
ax1.set_ylim(0, 40000)
ax2.set_yticks([0, 145, 200])
ax2.set_ylim(0, 200)
ax1.hist(arr, bins=[bins[0], bins[1]], facecolor='green', alpha=0.75, log=False)#, histtype='bar')#, rwidth=1.0)
ax2.hist(arr, bins=[bins[1], bins[2]], facecolor='green', alpha=0.75, log=False)#, histtype='bar')#, rwidth=1.0)
plt.xticks([0,1])
plt.title('Two y axes')
plt.savefig('two_axes.png')
plt.show()
plt.clf()

Related

How to plot a paired histogram using seaborn

I would like to make a paired histogram like the one shown here using the seaborn distplot.
This kind of plot can also be referred to as the back-to-back histogram shown here, or a bihistogram inverted/mirrored along the x-axis as discussed here.
Here is my code:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
green = np.random.normal(20,10,1000)
blue = np.random.poisson(60,1000)
fig, ax = plt.subplots(figsize=(8,6))
sns.distplot(blue, hist=True, kde=True, hist_kws={'edgecolor':'black'}, kde_kws={'linewidth':2}, bins=10, color='blue')
sns.distplot(green, hist=True, kde=True, hist_kws={'edgecolor':'black'}, kde_kws={'linewidth':2}, bins=10, color='green')
ax.set_xticks(np.arange(-20,121,20))
ax.set_yticks(np.arange(0.0,0.07,0.01))
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
plt.show()
Here is the output:
When I use the method discussed here (plt.barh), I get the bar plot shown just below, which is not what I am looking for.
Or maybe I haven't understood the workaround well enough...
A simple/short implementation of python-seaborn-distplot similar to these kinds of plots would be perfect. I edited the figure of my first plot above to show the kind of plot I hope to achieve (though y-axis not upside down):
Any leads would be greatly appreciated.
You could use two subplots and invert the y-axis of the lower one and plot with the same bins.
df = pd.DataFrame({'a': np.random.normal(0,5,1000), 'b': np.random.normal(20,5,1000)})
fig =plt.figure(figsize=(5,5))
ax = fig.add_subplot(211)
ax2 = fig.add_subplot(212)
bins = np.arange(-20,40)
ax.hist(df['a'], bins=bins)
ax2.hist(df['b'],color='orange', bins=bins)
ax2.invert_yaxis()
edit:
improvements suggested by #mwaskom
fig, axes = plt.subplots(nrows=2, ncols=1, sharex=True, figsize=(5,5))
bins = np.arange(-20,40)
for ax, column, color, invert in zip(axes.ravel(), df.columns, ['teal', 'orange'], [False,True]):
ax.hist(df[column], bins=bins, color=color)
if invert:
ax.invert_yaxis()
plt.subplots_adjust(hspace=0)
Here is a possible approach using seaborn's displots.
Seaborn doesn't return the created graphical elements, but the ax can be interrogated. To make sure the ax only contains the elements you want upside down, those elements can be drawn first. Then, all the patches (the rectangular bars) and the lines (the curve for the kde) can be given their height in negative. Optionally the x-axis can be set at y == 0 using ax.spines['bottom'].set_position('zero').
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
green = np.random.normal(20, 10, 1000)
blue = np.random.poisson(60, 1000)
fig, ax = plt.subplots(figsize=(8, 6))
sns.distplot(green, hist=True, kde=True, hist_kws={'edgecolor': 'black'}, kde_kws={'linewidth': 2}, bins=10,
color='green')
for p in ax.patches: # turn the histogram upside down
p.set_height(-p.get_height())
for l in ax.lines: # turn the kde curve upside down
l.set_ydata(-l.get_ydata())
sns.distplot(blue, hist=True, kde=True, hist_kws={'edgecolor': 'black'}, kde_kws={'linewidth': 2}, bins=10,
color='blue')
ax.set_xticks(np.arange(-20, 121, 20))
ax.set_yticks(np.arange(0.0, 0.07, 0.01))
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
pos_ticks = np.array([t for t in ax.get_yticks() if t > 0])
ticks = np.concatenate([-pos_ticks[::-1], [0], pos_ticks])
ax.set_yticks(ticks)
ax.set_yticklabels([f'{abs(t):.2f}' for t in ticks])
ax.spines['bottom'].set_position('zero')
plt.show()

Plotting three categories with two axes in matplotlib

I am using a combination of pandas and matplotlib to plot three values for several categories. I would like one plot to have its own axis, and the other two to share an axis.
Close, but illustrates the issue with why I need dual axes:
pd.DataFrame([[1,2,3], [500,600,700], [500, 700, 650]], columns=['foo', 'bar','baz'],
index=['a','b','c']).T.plot(kind='bar')
Instead, I would like a second axis for the a bars. My attempt:
smol = pd.DataFrame([[1,2,3], [500,600,700], [500, 700, 650]], columns=['foo', 'bar','baz'],
index=['a','b','c']).T
fig = plt.figure(figsize=(10,5)) # Create matplotlib figure
ax = fig.add_subplot(111) # Create matplotlib axes
ax2 = ax.twinx() # Create another axes that shares the same x-axis as ax.
smol['a'].plot(kind='bar', color='red', ax=ax, width=0.3,
position=1, edgecolor='black')
smol['b'].plot(kind='bar', color='blue', ax=ax2, width=0.3,
position=0, edgecolor='black')
ax.set_ylabel('Small scale')
ax2.set_ylabel('Big scale')
plt.show()
Unfortunately, adding
smol['c'].plot(kind='bar', color='green', ax=ax2, width=0.3,
position=0, edgecolor='black')
produces:
How can I have b and c share an axis, but appear next to each other, as in the first attempt?
I've used secondary_y keyword. The code is also considerably shorter
smol = pd.DataFrame([[1,2,3], [500,600,700], [500, 700, 650]], columns=['foo', 'bar','baz'],
index=['a','b','c']).T
ax = smol.plot(kind="bar", secondary_y=['b', 'c'])
ax.set_ylabel('Small scale')
ax.right_ax.set_ylabel('Big scale')
plt.show()

Matplotlib help: Formatting a scatter plot to be square

I have a scatter plot where the axis are both limited at -100 and 100. However, when I graph the data, I always get an unappealing looking plot that is rectangular with incorrect axis labels. I'd like the plot to be a square with -100 and 100 as the last axis labels. Does anyone have advice for fixing this formatting issue?
My code is as follows:
import scipy.stats
import numpy as np
r = scipy.stats.pearsonr(x_val, y_val)
fig, ax = matplotlib.pyplot.subplots()
ax.scatter(x_val, y_val, s=75, color='green', edgecolor='black', linewidth = 2, alpha=0.4)
ax.set_axisbelow(True)
matplotlib.pyplot.axvline(0, c='#262626', linewidth=1.5, alpha=0.9)
matplotlib.pyplot.axhline(0, c='#262626', linewidth=1.5, alpha=0.9)
matplotlib.pyplot.grid(linewidth=1, color='#bfbfbf')
matplotlib.pyplot.xticks(np.arange(-100, 100, 20.0),fontsize=14, fontweight='bold',
fontname='Helvetica')
matplotlib.pyplot.yticks(np.arange(-100, 100, 20.0),fontsize=14, fontweight='bold',
fontname='Helvetica')
matplotlib.pyplot.text(-95, 85,'Pearson\'s r: %.3f'%r[0], fontsize=14, fontweight='bold',
fontname='Helvetica')
matplotlib.pyplot.show()

Merge matplotlib subplots with shared x-axis

I have two graphs to where both have the same x-axis, but with different y-axis scalings.
The plot with regular axes is the data with a trend line depicting a decay while the y semi-log scaling depicts the accuracy of the fit.
fig1 = plt.figure(figsize=(15,6))
ax1 = fig1.add_subplot(111)
# Plot of the decay model
ax1.plot(FreqTime1,DecayCount1, '.', color='mediumaquamarine')
# Plot of the optimized fit
ax1.plot(x1, y1M, '-k', label='Fitting Function: $f(t) = %.3f e^{%.3f\t} \
%+.3f$' % (aR1,kR1,bR1))
ax1.set_xlabel('Time (sec)')
ax1.set_ylabel('Count')
ax1.set_title('Run 1 of Cesium-137 Decay')
# Allows me to change scales
# ax1.set_yscale('log')
ax1.legend(bbox_to_anchor=(1.0, 1.0), prop={'size':15}, fancybox=True, shadow=True)
Now, i'm trying to figure out to implement both close together like the examples supplied by this link
http://matplotlib.org/examples/pylab_examples/subplots_demo.html
In particular, this one
When looking at the code for the example, i'm a bit confused on how to implant 3 things:
1) Scaling the axes differently
2) Keeping the figure size the same for the exponential decay graph but having a the line graph have a smaller y size and same x size.
For example:
3) Keeping the label of the function to appear in just only the decay graph.
Any help would be most appreciated.
Look at the code and comments in it:
import matplotlib.pyplot as plt
import numpy as np
from matplotlib import gridspec
# Simple data to display in various forms
x = np.linspace(0, 2 * np.pi, 400)
y = np.sin(x ** 2)
fig = plt.figure()
# set height ratios for subplots
gs = gridspec.GridSpec(2, 1, height_ratios=[2, 1])
# the first subplot
ax0 = plt.subplot(gs[0])
# log scale for axis Y of the first subplot
ax0.set_yscale("log")
line0, = ax0.plot(x, y, color='r')
# the second subplot
# shared axis X
ax1 = plt.subplot(gs[1], sharex = ax0)
line1, = ax1.plot(x, y, color='b', linestyle='--')
plt.setp(ax0.get_xticklabels(), visible=False)
# remove last tick label for the second subplot
yticks = ax1.yaxis.get_major_ticks()
yticks[-1].label1.set_visible(False)
# put legend on first subplot
ax0.legend((line0, line1), ('red line', 'blue line'), loc='lower left')
# remove vertical gap between subplots
plt.subplots_adjust(hspace=.0)
plt.show()
Here is my solution:
import numpy as np
import matplotlib.pyplot as plt
x = np.linspace(0, 2 * np.pi, 400)
y = np.sin(x ** 2)
fig, (ax1,ax2) = plt.subplots(nrows=2, sharex=True, subplot_kw=dict(frameon=False)) # frameon=False removes frames
plt.subplots_adjust(hspace=.0)
ax1.grid()
ax2.grid()
ax1.plot(x, y, color='r')
ax2.plot(x, y, color='b', linestyle='--')
One more option is seaborn.FacetGrid but this requires Seaborn and Pandas libraries.
Here are some adaptions to show how the code could work to add a combined legend when plotting a pandas dataframe. ax=ax0 can be used to plot on a given ax and ax0.get_legend_handles_labels() gets the information for the legend.
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
dates = pd.date_range('20210101', periods=100, freq='D')
df0 = pd.DataFrame({'x': np.random.normal(0.1, 1, 100).cumsum(),
'y': np.random.normal(0.3, 1, 100).cumsum()}, index=dates)
df1 = pd.DataFrame({'z': np.random.normal(0.2, 1, 100).cumsum()}, index=dates)
fig, (ax0, ax1) = plt.subplots(nrows=2, sharex=True, gridspec_kw={'height_ratios': [2, 1], 'hspace': 0})
df0.plot(ax=ax0, color=['dodgerblue', 'crimson'], legend=False)
df1.plot(ax=ax1, color='limegreen', legend=False)
# put legend on first subplot
handles0, labels0 = ax0.get_legend_handles_labels()
handles1, labels1 = ax1.get_legend_handles_labels()
ax0.legend(handles=handles0 + handles1, labels=labels0 + labels1)
# remove last tick label for the second subplot
yticks = ax1.get_yticklabels()
yticks[-1].set_visible(False)
plt.tight_layout()
plt.show()

How to plot a superimposed bar chart using matplotlib in python?

I want to plot a bar chart or a histogram using matplotlib. I don't want a stacked bar plot, but a superimposed barplot of two lists of data, for instance I have the following two lists of data with me:
Some code to begin with :
import matplotlib.pyplot as plt
from numpy.random import normal, uniform
highPower = [1184.53,1523.48,1521.05,1517.88,1519.88,1414.98,1419.34,
1415.13,1182.70,1165.17]
lowPower = [1000.95,1233.37, 1198.97,1198.01,1214.29,1130.86,1138.70,
1104.12,1012.95,1000.36]
plt.hist(highPower, bins=10, histtype='stepfilled', normed=True,
color='b', label='Max Power in mW')
plt.hist(lowPower, bins=10, histtype='stepfilled', normed=True,
color='r', alpha=0.5, label='Min Power in mW')
I want to plot these two lists against the number of values in the two lists such that I am able to see the variation per reading.
You can produce a superimposed bar chart using plt.bar() with the alpha keyword as shown below.
The alpha controls the transparency of the bar.
N.B. when you have two overlapping bars, one with an alpha < 1, you will get a mixture of colours. As such the bar will appear purple even though the legend shows it as a light red. To alleviate this I have modified the width of one of the bars, this way even if your powers should change you will still be able to see both bars.
plt.xticks can be used to set the location and format of the x-ticks in your graph.
import matplotlib.pyplot as plt
import numpy as np
width = 0.8
highPower = [1184.53,1523.48,1521.05,1517.88,1519.88,1414.98,
1419.34,1415.13,1182.70,1165.17]
lowPower = [1000.95,1233.37, 1198.97,1198.01,1214.29,1130.86,
1138.70,1104.12,1012.95,1000.36]
indices = np.arange(len(highPower))
plt.bar(indices, highPower, width=width,
color='b', label='Max Power in mW')
plt.bar([i+0.25*width for i in indices], lowPower,
width=0.5*width, color='r', alpha=0.5, label='Min Power in mW')
plt.xticks(indices+width/2.,
['T{}'.format(i) for i in range(len(highPower))] )
plt.legend()
plt.show()
Building on #Ffisegydd's answer, if your data is in a Pandas DataFrame, this should work nicely:
def overlapped_bar(df, show=False, width=0.9, alpha=.5,
title='', xlabel='', ylabel='', **plot_kwargs):
"""Like a stacked bar chart except bars on top of each other with transparency"""
xlabel = xlabel or df.index.name
N = len(df)
M = len(df.columns)
indices = np.arange(N)
colors = ['steelblue', 'firebrick', 'darksage', 'goldenrod', 'gray'] * int(M / 5. + 1)
for i, label, color in zip(range(M), df.columns, colors):
kwargs = plot_kwargs
kwargs.update({'color': color, 'label': label})
plt.bar(indices, df[label], width=width, alpha=alpha if i else 1, **kwargs)
plt.xticks(indices + .5 * width,
['{}'.format(idx) for idx in df.index.values])
plt.legend()
plt.title(title)
plt.xlabel(xlabel)
plt.ylabel(ylabel)
if show:
plt.show()
return plt.gcf()
And then in a python command line:
low = [1000.95, 1233.37, 1198.97, 1198.01, 1214.29, 1130.86, 1138.70, 1104.12, 1012.95, 1000.36]
high = [1184.53, 1523.48, 1521.05, 1517.88, 1519.88, 1414.98, 1419.34, 1415.13, 1182.70, 1165.17]
df = pd.DataFrame(np.matrix([high, low]).T, columns=['High', 'Low'],
index=pd.Index(['T%s' %i for i in range(len(high))],
name='Index'))
overlapped_bar(df, show=False)
It is actually simpler than the answers all over the internet make it appear.
a = range(1,10)
b = range(4,13)
ind = np.arange(len(a))
fig = plt.figure()
ax = fig.add_subplot(111)
ax.bar(x=ind, height=a, width=0.35,align='center')
ax.bar(x=ind, height=b, width=0.35/3, align='center')
plt.xticks(ind, a)
plt.tight_layout()
plt.show()

Categories