Annoying white space in bar chart (matplotlib, Python) - python

It's probably a trivial question, but I am trying to plot a bar chart with matplotlib and with rotated text on the x axis.
The code I'm using is shown below:
fig = plt.figure()
x_labels_list = []
for i in range(0, pow(2, N)):
x_labels_list.append(str(f(i))) # The function f() converts i to a binary string
ax = plt.subplot(111)
width = 1.0
bins = map(lambda x: x-width, range(1,pow(2,N)+1))
ax.bar(bins, my_data, width=width)
ax.set_xticks(map(lambda x: x-width/2, range(1,pow(2,N)+1)))
ax.set_xticklabels(x_labels_list, rotation=90, rotation_mode="anchor", ha="right")
It works perfectly, but I obtain an annoying white space on the right of the x axis, as shown by the red ellipse in the following picture:
Do you know how I can remove it? Thanks in advance!

Try calling plt.xlim() with the number of bins, e.g.
plt.xlim([0,bins.size])
Here is an example:
#make some data
N = 22
data = np.random.randint(1,10,N)
bin = np.arange(N)
width = 1
#plot it
ax = plt.subplot(111)
ax.bar(bin, data, width, color='r')
plt.show()
No plt.xlim() output:
Now plot it with plt.xlim using the number of bins to define the size:
#plot it
ax = plt.subplot(111)
ax.bar(bin, data, width, color='r')
plt.xlim([0,bin.size])
plt.show()
Results it:
There may be a better way, but this should work for you.

Related

scatter plot color bar does not look right

I have written my code to create a scatter plot with a color bar on the right. But the color bar does not look right, in the sense that the color is too light to be mapped to the actual color used in the plot. I am not sure what is missing or wrong here. But I am hoping to get something similar to what's shown here: https://medium.com/#juliansteam/what-bert-topic-modelling-reveal-about-the-2021-unrest-in-south-africa-d0d15629a9b4 (about in the middle of the page)
df = .... # data loading
df["topic"] = topics
# Plot parameters
top_n = topn
fontsize = 15
# some data preparation
to_plot = df.copy()
to_plot[df.topic >= top_n] = -1
outliers = to_plot.loc[to_plot.topic == -1]
non_outliers = to_plot.loc[to_plot.topic != -1]
#the actual plot
fig, ax = plt.subplots(figsize=(15, 15))
scatter_outliers = ax.scatter(outliers['x'], outliers['y'], color="#E0E0E0", s=1, alpha=.3)
scatter = ax.scatter(non_outliers['x'], non_outliers['y'], c=non_outliers['topic'], s=1, alpha=.3, cmap='hsv_r')
ax.text(0.99, 0.01, f"BERTopic - Top {top_n} topics", transform=ax.transAxes, horizontalalignment="right", color="black")
plt.xticks([], [])
plt.yticks([], [])
plt.colorbar(scatter)
plt.savefig(outfile+"_1.png", format='png', dpi=300)
plt.clf()
plt.close()
As you can see, an example plot looks like this. The color bar is created, but compared to that shown in the link above, the color is very light and does not seem to map to those on the scatter plot. Any suggestions?
The colorbar uses the given alpha=.3. In the scatterplot, many dots with the same color are superimposed, causing them to look brighter than a single dot.
One way to tackle this, is to create a ScalarMappable object to be used by the colorbar, taking the colormap and the norm of the scatter plot (but not its alpha). Note that simply changing the alpha of the scatter object (scatter.set_alpha(1)) would also change the plot itself.
import matplotlib.pyplot as plt
from matplotlib.cm import ScalarMappable
import numpy as np
x = np.random.normal(np.repeat(np.random.uniform(0, 20, 10), 1000))
y = np.random.normal(np.repeat(np.random.uniform(0, 10, 10), 1000))
c = np.repeat(np.arange(10), 1000)
scatter = plt.scatter(x, y, c=c, cmap='hsv_r', alpha=.3, s=3)
plt.colorbar(ScalarMappable(cmap=scatter.get_cmap(), norm=scatter.norm))
plt.tight_layout()
plt.show()

Two subplots coming out too long (length)

I'm attempting to plot two bar charts using matplotlib.pyplot.subplots. I've created subplots within a function, but when I output the subplots they are too long in height and not long enough in width.
Here's the function that I wrote:
def corr_bar(data1, data2, method='pearson'):
# Basic configuration.
fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(7, 7))
ax1, ax2 = axes
corr_matrix1 = data1.corr(method=method)
corr_matrix2 = data2.corr(method=method)
cmap = cm.get_cmap('coolwarm')
major_ticks = np.arange(0, 1.1, 0.1)
minor_ticks = np.arange(0, 1.1, 0.05)
# Values for plotting.
x1 = corr_matrix1['price'].sort_values(ascending=False).index
x2 = corr_matrix2['price'].sort_values(ascending=False).index
values1 = corr_matrix1['price'].sort_values(ascending=False).values
values2 = corr_matrix2['price'].sort_values(ascending=False).values
im1 = ax1.bar(x1, values1, color=cmap(values1))
im2 = ax2.bar(x2, values2, color=cmap(values2))
# Formatting for plot 1.
ax1.set_yticks(major_ticks)
ax1.set_yticks(minor_ticks, minor=True)
plt.setp(ax1.get_xticklabels(), rotation=45, ha='right', rotation_mode='anchor')
ax1.grid(which='both')
ax1.grid(which='minor', alpha=0.4)
ax1.grid(which='major', alpha=0.7)
ax1.xaxis.grid(False)
# Formatting for plot 2.
ax2.set_yticks(major_ticks)
ax2.set_yticks(minor_ticks, minor=True)
plt.setp(ax2.get_xticklabels(), rotation=45, ha='right', rotation_mode='anchor')
ax2.grid(which='both')
ax2.grid(which='minor', alpha=0.4)
ax2.grid(which='major', alpha=0.7)
ax2.xaxis.grid(False)
fig.tight_layout()
plt.show()
This function (when run with two Pandas DataFrames) outputs an image like the following:
I purposely captured the blank right side of the image as well in an attempt to better depict my predicament. What I want is for the bar charts to be appropriately sized in height and width as to take up the entire space, rather than be elongated and pushed to the left.
I've tried to use the ax.set(aspect='equal') method but it "scrunches up" the bar chart. Would anybody happen to know what I could do to solve this issue?
Thank you.
When you define figsize=(7,7) you are setting the size of the entire figure and not the subplots. So your entire figure must be a square in this case. You should change it to figsize=(14,7) or use a number larger than 14 to get a little bit of extra space.

How to plot a superimposed bar chart using matplotlib in python?

I want to plot a bar chart or a histogram using matplotlib. I don't want a stacked bar plot, but a superimposed barplot of two lists of data, for instance I have the following two lists of data with me:
Some code to begin with :
import matplotlib.pyplot as plt
from numpy.random import normal, uniform
highPower = [1184.53,1523.48,1521.05,1517.88,1519.88,1414.98,1419.34,
1415.13,1182.70,1165.17]
lowPower = [1000.95,1233.37, 1198.97,1198.01,1214.29,1130.86,1138.70,
1104.12,1012.95,1000.36]
plt.hist(highPower, bins=10, histtype='stepfilled', normed=True,
color='b', label='Max Power in mW')
plt.hist(lowPower, bins=10, histtype='stepfilled', normed=True,
color='r', alpha=0.5, label='Min Power in mW')
I want to plot these two lists against the number of values in the two lists such that I am able to see the variation per reading.
You can produce a superimposed bar chart using plt.bar() with the alpha keyword as shown below.
The alpha controls the transparency of the bar.
N.B. when you have two overlapping bars, one with an alpha < 1, you will get a mixture of colours. As such the bar will appear purple even though the legend shows it as a light red. To alleviate this I have modified the width of one of the bars, this way even if your powers should change you will still be able to see both bars.
plt.xticks can be used to set the location and format of the x-ticks in your graph.
import matplotlib.pyplot as plt
import numpy as np
width = 0.8
highPower = [1184.53,1523.48,1521.05,1517.88,1519.88,1414.98,
1419.34,1415.13,1182.70,1165.17]
lowPower = [1000.95,1233.37, 1198.97,1198.01,1214.29,1130.86,
1138.70,1104.12,1012.95,1000.36]
indices = np.arange(len(highPower))
plt.bar(indices, highPower, width=width,
color='b', label='Max Power in mW')
plt.bar([i+0.25*width for i in indices], lowPower,
width=0.5*width, color='r', alpha=0.5, label='Min Power in mW')
plt.xticks(indices+width/2.,
['T{}'.format(i) for i in range(len(highPower))] )
plt.legend()
plt.show()
Building on #Ffisegydd's answer, if your data is in a Pandas DataFrame, this should work nicely:
def overlapped_bar(df, show=False, width=0.9, alpha=.5,
title='', xlabel='', ylabel='', **plot_kwargs):
"""Like a stacked bar chart except bars on top of each other with transparency"""
xlabel = xlabel or df.index.name
N = len(df)
M = len(df.columns)
indices = np.arange(N)
colors = ['steelblue', 'firebrick', 'darksage', 'goldenrod', 'gray'] * int(M / 5. + 1)
for i, label, color in zip(range(M), df.columns, colors):
kwargs = plot_kwargs
kwargs.update({'color': color, 'label': label})
plt.bar(indices, df[label], width=width, alpha=alpha if i else 1, **kwargs)
plt.xticks(indices + .5 * width,
['{}'.format(idx) for idx in df.index.values])
plt.legend()
plt.title(title)
plt.xlabel(xlabel)
plt.ylabel(ylabel)
if show:
plt.show()
return plt.gcf()
And then in a python command line:
low = [1000.95, 1233.37, 1198.97, 1198.01, 1214.29, 1130.86, 1138.70, 1104.12, 1012.95, 1000.36]
high = [1184.53, 1523.48, 1521.05, 1517.88, 1519.88, 1414.98, 1419.34, 1415.13, 1182.70, 1165.17]
df = pd.DataFrame(np.matrix([high, low]).T, columns=['High', 'Low'],
index=pd.Index(['T%s' %i for i in range(len(high))],
name='Index'))
overlapped_bar(df, show=False)
It is actually simpler than the answers all over the internet make it appear.
a = range(1,10)
b = range(4,13)
ind = np.arange(len(a))
fig = plt.figure()
ax = fig.add_subplot(111)
ax.bar(x=ind, height=a, width=0.35,align='center')
ax.bar(x=ind, height=b, width=0.35/3, align='center')
plt.xticks(ind, a)
plt.tight_layout()
plt.show()

Matplotlib: how to set ticks of twinned axis in log plot

In my plot, a secondary x axis is used to display the value of another variable for some data. Now, the original axis is log scaled. Unfortunaltely, the twinned axis puts the ticks (and the labels) referring to the linear scale of the original axis and not as intended to the log scale. How can this be overcome?
Here the code example that should put the ticks of the twinned axis in the same (absolute axes) position as the ones for the original axis:
def conv(x):
"""some conversion function"""
# ...
return x2
ax = plt.subplot(1,1,1)
ax.set_xscale('log')
# get the location of the ticks of ax
axlocs,axlabels = plt.xticks()
# twin axis and set limits as in ax
ax2 = ax.twiny()
ax2.set_xlim(ax.get_xlim())
#Set the ticks, should be set referring to the log scale of ax, but are set referring to the linear scale
ax2.set_xticks(axlocs)
# put the converted labels
ax2.set_xticklabels(map(conv,axlocs))
An alternative way would be (the ticks are then not set in the same position, but that doesn't matter):
from matplotlib.ticker import FuncFormatter
ax = plt.subplot(1,1,1)
ax.set_xscale('log')
ax2 = ax.twiny()
ax2.set_xlim(ax.get_xlim())
ax2.xaxis.set_major_formatter(FuncFormatter(lambda x,pos:conv(x)))
Both approaches work well as long as no log scale is used.
Perhaps there exists an easy fix. Is there something I missed in the documentation?
As a workaround, I tried to obtain the ax.transAxes coordinates of the ticks of ax and put the ticks at the very same position in ax2. But there does not exist something like
ax2.set_xticks(axlocs,transform=ax2.transAxes)
TypeError: set_xticks() got an unexpected keyword argument 'transform'
This has been asked a while ago, but I stumbled over it with the same question.
I eventually managed to solve the problem by introducing a logscaled (semilogx) transparent (alpha=0) dummy plot.
Example:
import numpy as np
import matplotlib.pyplot as plt
def conversion_func(x): # some arbitrary transformation function
return 2 * x**0.5 # from x to z
x = np.logspace(0, 5, 100)
y = np.sin(np.log(x))
fig = plt.figure()
ax = plt.gca()
ax.semilogx(x, y, 'k')
ax.set_xlim(x[0], x[-1]) # this is important in order that limits of both axes match
ax.set_ylabel("$y$")
ax.set_xlabel("$x$", color='C0')
ax.tick_params(axis='x', which='both', colors='C0')
ax.axvline(100, c='C0', lw=3)
ticks_x = np.logspace(0, 5, 5 + 1) # must span limits of first axis with clever spacing
ticks_z = conversion_func(ticks_x)
ax2 = ax.twiny() # get the twin axis
ax2.semilogx(ticks_z, np.ones_like(ticks_z), alpha=0) # transparent dummy plot
ax2.set_xlim(ticks_z[0], ticks_z[-1])
ax2.set_xlabel("$z \equiv f(x)$", color='C1')
ax2.xaxis.label.set_color('C1')
ax2.tick_params(axis='x', which='both', colors='C1')
ax2.axvline(20, ls='--', c='C1', lw=3) # z=20 indeed matches x=100 as desired
fig.show()
In the above example the vertical lines demonstrate that first and second axis are indeed shifted to one another as wanted. x = 100 gets shifted to z = 2*x**0.5 = 20. The colours are just to clarify which vertical line goes with which axis.
Don't need to cover them, just Eliminate the ticks!
d= [7,9,14,17,35,70];
j= [100,80,50,40,20,10];
plt.figure()
plt.xscale('log')
plt.plot(freq, freq*spec) #plot some spectrum
ax1 = plt.gca() #define my first axis
ax1.yaxis.set_ticks_position('both')
ax1.tick_params(axis='y',which='both',direction='in');
ax1.tick_params(axis='x',which='both',direction='in');
ax2 = ax1.twiny() #generates second axis (top)
ax2.set_xlim(ax1.get_xlim()); #same limits
plt.xscale('log') #make it log
ax2.set_xticks(freq[d]); #my own 'major' ticks OVERLAPS!!!
ax2.set_xticklabels(j); #change labels
ax2.tick_params(axis='x',which='major',direction='in');
ax2.tick_params(axis='x',which='minor',top=False); #REMOVE 'MINOR' TICKS
ax2.grid()
I think you can fix your issue by calling ax2.set_xscale('log').
import matplotlib.pyplot as plt
import numpy as np
fig, ax = plt.subplots()
ax.semilogx(np.logspace(1.0, 5.0, 20), np.random.random([20]))
new_tick_locations = np.array([10., 100., 1000., 1.0e4])
def tick_function(X):
V = X / 1000.
return ["%.3f" % z for z in V]
ax2 = ax.twiny()
ax2.set_xscale('log')
ax2.set_xlim(ax.get_xlim())
ax2.set_xticks(new_tick_locations)
ax2.set_xticklabels(tick_function(new_tick_locations))
ax2.set_xlabel(r"Modified x-axis: $X/1000$")

Add second axis to polar plot

I try to plot two polar plots in one figure. See code below:
fig = super(PlotWindPowerDensity, self).get_figure()
rect = [0.1, 0.1, 0.8, 0.8]
ax = WindSpeedDirectionAxes(fig, rect)
self.values_dict = collections.OrderedDict(sorted(self.values_dict.items()))
values = self.values_dict.items()
di, wpd = zip(*values)
wpd = np.array(wpd).astype(np.double)
wpdmask = np.isfinite(wpd)
theta = self.radar_factory(int(len(wpd)))
# spider plot
ax.plot(theta[wpdmask], wpd[wpdmask], color = 'b', alpha = 0.5)
ax.fill(theta[wpdmask], wpd[wpdmask], facecolor = 'b', alpha = 0.5)
# bar plot
ax.plot_bar(table=self.table, sectors=self.sectors, speedbins=self.wpdbins, option='wind_power_density', colorfn=get_sequential_colors)
fig.add_axes(ax)
return fig
The length of the bar is the data base (how many sampling points for this sector). The colors of the bars show the frequency of certain value bins (eg. 2.5-5 m/s) in the correspondent sector (blue: low, red: high). The blue spider plot shows the mean value for each sector.
In the shown figure, the values of each plot are similar, but this is rare. I need to assign the second plot to another axis and show this axis in another direction.
EDIT:
After the nice answer of Joe, i get the result of the figure.
That's almost everything i wanted to achieve. But there are some points i wasn't able to figure out.
The plot is made for dynamicly changing data bases. Therefore i need a dynamic way to get the same location of the circles. Till now I solve it with:
start, end = ax2.get_ylim()
ax2.yaxis.set_ticks(np.arange(0, end, end / len(ax.yaxis.get_ticklocs())))
means: for second axis i alter the ticks in order to fit the ticklocs to the one's of first axis.
In most cases i get some decimal places, but i don't want that, because it corrupts the clearness of the plot. Is there a way to solve this problem more smartly?
The ytics (the radial one's) range from 0 to the next-to-last circle. How can i achieve that the values range from the first circle to the very last (the border)? The same like for the first axis.
So, as I understand it, you want to display data with very different magnitudes on the same polar plot. Basically you're asking how to do something similar to twinx for polar axes.
As an example to illustrate the problem, it would be nice to display the green series on the plot below at a different scale than the blue series, while keeping them on the same polar axes for easy comparison.:
import numpy as np
import matplotlib.pyplot as plt
numpoints = 30
theta = np.linspace(0, 2*np.pi, numpoints)
r1 = np.random.random(numpoints)
r2 = 5 * np.random.random(numpoints)
params = dict(projection='polar', theta_direction=-1, theta_offset=np.pi/2)
fig, ax = plt.subplots(subplot_kw=params)
ax.fill_between(theta, r2, color='blue', alpha=0.5)
ax.fill_between(theta, r1, color='green', alpha=0.5)
plt.show()
However, ax.twinx() doesn't work for polar plots.
It is possible to work around this, but it's not very straight-forward. Here's an example:
import numpy as np
import matplotlib.pyplot as plt
def main():
numpoints = 30
theta = np.linspace(0, 2*np.pi, numpoints)
r1 = np.random.random(numpoints)
r2 = 5 * np.random.random(numpoints)
params = dict(projection='polar', theta_direction=-1, theta_offset=np.pi/2)
fig, ax = plt.subplots(subplot_kw=params)
ax2 = polar_twin(ax)
ax.fill_between(theta, r2, color='blue', alpha=0.5)
ax2.fill_between(theta, r1, color='green', alpha=0.5)
plt.show()
def polar_twin(ax):
ax2 = ax.figure.add_axes(ax.get_position(), projection='polar',
label='twin', frameon=False,
theta_direction=ax.get_theta_direction(),
theta_offset=ax.get_theta_offset())
ax2.xaxis.set_visible(False)
# There should be a method for this, but there isn't... Pull request?
ax2._r_label_position._t = (22.5 + 180, 0.0)
ax2._r_label_position.invalidate()
# Ensure that original axes tick labels are on top of plots in twinned axes
for label in ax.get_yticklabels():
ax.figure.texts.append(label)
return ax2
main()
That does what we want, but it looks fairly bad at first. One improvement would be to the tick labels to correspond to what we're plotting:
plt.setp(ax2.get_yticklabels(), color='darkgreen')
plt.setp(ax.get_yticklabels(), color='darkblue')
However, we still have the double-grids, which are rather confusing. One easy way around this is to manually set the r-limits (and/or r-ticks) such that the grids will fall on top of each other. Alternately, you could write a custom locator to do this automatically. Let's stick with the simple approach here:
ax.set_rlim([0, 5])
ax2.set_rlim([0, 1])
Caveat: Because shared axes don't work for polar plots, the implmentation I have above will have problems with anything that changes the position of the original axes. For example, adding a colorbar to the figure will cause all sorts of problems. It's possible to work around this, but I've left that part out. If you need it, let me know, and I'll add an example.
At any rate, here's the full, stand-alone code to generate the final figure:
import numpy as np
import matplotlib.pyplot as plt
np.random.seed(1977)
def main():
numpoints = 30
theta = np.linspace(0, 2*np.pi, numpoints)
r1 = np.random.random(numpoints)
r2 = 5 * np.random.random(numpoints)
params = dict(projection='polar', theta_direction=-1, theta_offset=np.pi/2)
fig, ax = plt.subplots(subplot_kw=params)
ax2 = polar_twin(ax)
ax.fill_between(theta, r2, color='blue', alpha=0.5)
ax2.fill_between(theta, r1, color='green', alpha=0.5)
plt.setp(ax2.get_yticklabels(), color='darkgreen')
plt.setp(ax.get_yticklabels(), color='darkblue')
ax.set_ylim([0, 5])
ax2.set_ylim([0, 1])
plt.show()
def polar_twin(ax):
ax2 = ax.figure.add_axes(ax.get_position(), projection='polar',
label='twin', frameon=False,
theta_direction=ax.get_theta_direction(),
theta_offset=ax.get_theta_offset())
ax2.xaxis.set_visible(False)
# There should be a method for this, but there isn't... Pull request?
ax2._r_label_position._t = (22.5 + 180, 0.0)
ax2._r_label_position.invalidate()
# Bit of a hack to ensure that the original axes tick labels are on top of
# whatever is plotted in the twinned axes. Tick labels will be drawn twice.
for label in ax.get_yticklabels():
ax.figure.texts.append(label)
return ax2
if __name__ == '__main__':
main()
Just to add onto #JoeKington 's (great) answer, I found that the "hack to ensure that the original axes tick labels are on top of whatever is plotted in the twinned axes" didn't work for me so as an alternative I've used:
from matplotlib.ticker import MaxNLocator
#Match the tick point locations by setting the same number of ticks in the
# 2nd axis as the first
ax2.yaxis.set_major_locator(MaxNLocator(nbins=len(ax1.get_yticks())))
#Set the last tick as the plot limit
ax2.set_ylim(0, ax2.get_yticks()[-1])
#Remove the tick label at zero
ax2.yaxis.get_major_ticks()[0].label1.set_visible(False)

Categories