Plot a histogram using Python with manual bins - python

I am trying to plot a histogram using the matplotlib.hist() function.
Below code is not getting the correct histogram
X axis is the years (age), X axis I want to have 0 20, 25, 30, 35, 40, 45, 50, 55
Y axis is the probability
Someone can help me to get the correct histogram ?
import matplotlib.pyplot as plt
list_age = ['26','28','26','36','38','31','22','31','25','30','37','27','27','29','27','21','27','38','31','41','28','31','28','33','26','39','37','24','31','34','39','33','22', '30','24','29','28','34','27','28','26','26','25','40','24','37','24','28','26','29','26','31','23','31','36','32','25','31','25','33','36','27','28',
'25','27','39','36','30','31','34','23','31','32','31','33','32','39','35','35','22','34','25','35','35','41','20','21','35','32','30','22','21','23','33','25','30','24','39','24','27','22','33','30','27','30','23','29','30','22','31','29','31','24','29','25','24','26','29','31','24','32','21','25','29','30']
list_age.sort()
bins = 55
plt.hist(list_age, bins, facecolor='g')
plt.xlabel('Years')
plt.ylabel('Probability')
plt.grid(True)
plt.show()

You need to convert your list_age to a list of integers first (instead of a list of strings).
Then, simply use the options density (or normed) to display probability and xticks to change the ticks for the x-axis.
import matplotlib.pyplot as plt
list_age = ['26','28','26','36','38','31','22','31','25','30','37','27','27','29','27','21','27','38','31','41','28','31','28','33','26','39','37','24','31','34','39','33','22', '30','24','29','28','34','27','28','26','26','25','40','24','37','24','28','26','29','26','31','23','31','36','32','25','31','25','33','36','27','28',
'25','27','39','36','30','31','34','23','31','32','31','33','32','39','35','35','22','34','25','35','35','41','20','21','35','32','30','22','21','23','33','25','30','24','39','24','27','22','33','30','27','30','23','29','30','22','31','29','31','24','29','25','24','26','29','31','24','32','21','25','29','30']
list_age = [ int(i) for i in list_age ]
bins = len(set(list_age))
plt.hist(list_age, bins = bins, density = True, facecolor = "g") # Replace density by normed if older version of matplotlib
plt.xticks(range(0, 55, 5))
plt.xlabel('Years')
plt.ylabel('Probability')
plt.grid(True)
plt.show()
If you want to display the bars at specific bins, simply define bins at their coordinates:
plt.hist(list_age, bins = [ 0, 20, 25, 30, 35, 40, 45, 50, 55 ], density = True, facecolor = "g")

Related

How do I fit a line in a histogram plot and measure the slope?

Good day. Please I need help fitting a line on a portion of the plots. I want a line fitted on the steep towards the right.
import matplotlib.pyplot as plt
fig, ax = plt.subplots(1,2,figsize =(12,5))
ax[0].hist(pfd.peak_flux.loc[pfd.peak_flux<=1], bins = 30, color='grey', histtype = 'step')
ax[0].set_xlabel('Peak flux density')
ax[0].set_ylabel(r'$log N$')
ax[0].set_yscale('log')
ax[0].set_xscale('log')
ax[1].hist(pfd.int_flux.loc[pfd.int_flux<=1], bins = 30, color='grey', histtype = 'step')
ax[1].set_xlabel('Integrated flux density')
ax[1].set_ylabel(r'$log N$')
ax[1].set_yscale('log')
ax[1].set_xscale('log')
#ax[1].set_ylabel('counts')
#plt.title(r'MGPS positional error for $\leq 2$ arc-seconds')
plt.savefig('log-fluxes.png', dpi = 700, bbox_inches='tight', pad_inches = 0)
Here is how the plot looks?
You can get the data points of histogram as bellow :
counts, bins, bars =ax[0].hist(np.random.randn(1000,1), bins = 30, color='grey', histtype = 'step')
counts is y axis and bins is x axis. Use regression from scipy to get the slop of last n points.
from scipy.stats import linregress
Regression=linregress(counts[-15:], bins[-15:])
slope=Regression.slope

Single stacked bar chart with custom gradient coloring

Here's what I came up with by plotting thick line segments.
The coloration is blue, with varying alpha, 0 < alpha < 1.
My workaround doens't work as I'd like because I don't have a legend (I want a legend that shows a gradient of the blue at varying alpha).
Additionally, I've found that matplotlib scales funny. There should be no overlap of the bars, but if I adjust the window size, the gap between the line segments will change.This is the same figure as the earlier one, just after I've resized the figure window with my mouse.
I'm not sure if there's a better way to go about accomplishing this, or if there's a different package I can use.
Here's the snippet of code that I'm using.
import matplotlib.pyplot as plt
x1 =[0, 19, 39, 46, 60, 79]
x2 = [19, 39, 46, 60, 79, 90]
alpha_list = [-0.8402, -0.6652, 0.0, -0.5106, -0.8074, 0.0]
plt.figure()
for idx,x in enumerate(x1):
plt.plot([x1[idx],x2[idx]],[0,0],color = 'blue',alpha=alpha_list[idx],linewidth =20)
plt.show()
I suppose alpha is just a workaround for using different shades of blue? In that case the Blues colormap can be used instead.
Several lines can be plotted using a LineCollection.
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.collections import LineCollection
x1 =[0, 19, 39, 46, 60, 79]
x2 = [19, 39, 46, 60, 79, 90]
alpha_list = [-0.8402, -0.6652, 0.0, -0.5106, -0.8074, 0.0]
verts = np.dstack((np.c_[x1, x2], np.zeros((len(x1), 2))))
fig, ax = plt.subplots()
lc = LineCollection(verts, linewidth=40, cmap="Blues_r", array=np.array(alpha_list))
ax.add_collection(lc)
ax.autoscale()
ax.set_ylim(-1,1)
fig.colorbar(lc)
plt.show()
I think a workaround would be to use plt.barh. Here is an example using normalized color maps. Each color gets converted to RGBA before it can be passed to plt.barh.
import matplotlib.pyplot as plt
from matplotlib import colors
import matplotlib.cm as cmx
x1 =[0, 19, 39, 46, 60, 79]
x2 = [19, 39, 46, 60, 79, 90]
values = range(len(x1))
jet = cm = plt.get_cmap('jet')
cNorm = colors.Normalize(vmin=0, vmax=values[-1])
scalarMap = cmx.ScalarMappable(norm=cNorm, cmap=jet)
fig, ax = plt.subplots()
for idx, x, y in zip(values,x1, x2):
colorVal = scalarMap.to_rgba(values[idx])
start = x
end = y
width=end-start
ax.barh(y = 0, width = width, left=start, height = 0.1, label = str(idx), color=colorVal)
ax.set_ylim(-.5,0.5)
ax.legend()
which returns:
If you really want to just change the alpha transparency of a single color, you would just have to input alpha_list[idx] for the last element to the RGBA tuple colorVal. For some reason, RGBA did not like negative alpha values, so notice I changed them all to positive
fig, ax = plt.subplots()
alpha_list = [0.8402, 0.6652, 0.01, 0.5106, 0.8074, 0.0]
for idx, x, y in zip(values,x1, x2):
colorVal = (0.0, 0.3, 1.0, alpha_list[idx])
start = x
end = y
width=end-start
ax.barh(y = 0, width = width, left=start, height = 0.1, label = str(idx), color=colorVal)
ax.set_ylim(-.5,0.5)
ax.legend()

matplotlib colorbar boundaries do not implemented

I am trying to create several plots all with the same colorbar limits in a loop.
I set the limits of the contour plot with map.contourf(x, y, U_10m, vmin=0, vmax=25) and this seems to give consistent colour scales for each plot. However, when I use cbar = plt.colorbar(boundaries=np.linspace(0,1,25), ticks=[0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24]) # sets all cbar to same limits each plot does not have the same colorbar limits (examples of two plots with different colorbars and code below).
from netCDF4 import Dataset as NetCDFFile
from mpl_toolkits.basemap import Basemap
import matplotlib.pyplot as plt
import numpy as np
def wrf_tseries_contour_plotter (
ncfile, time_ind, lowerllat, upperrlat, lowerllon, upperrlon, output_dir):
'''
EDITED FROM http://www.atmos.washington.edu/~ovens/wrfwinds.html
'''
print 'timestep:', + time_ind
#which figure is being generated 0 = 00:00, 144 = 23:50
nc = NetCDFFile(ncfile, 'r')
#
# get the actual longitudes, latitudes, and corners
lons = nc.variables['XLONG'][time_ind]
lats = nc.variables['XLAT'][time_ind]
#get the u10 to plot as a contour instead of t2m
U10_raw = nc.variables['U10'][time_ind] #61 is the index for 10:00am
V10_raw = nc.variables['V10'][time_ind]
#bodge to calculate U from U and V (u10 = sqrt(u^2+v^2))
v2 = np.square(V10_raw)
u2 = np.square(U10_raw)
U_10m = np.sqrt(u2 + v2)
# Make map
map = Basemap(projection='cyl',llcrnrlat=lowerllat,urcrnrlat=upperrlat,
llcrnrlon=lowerllon,urcrnrlon=upperrlon,
resolution='h')
# lllat, urlat,lllon, urlon set outside of f(x) lower left and
# upper right lat/lon for basemap axis limits
x, y = map(lons[:,:], lats[:,:])
map.contourf(x, y, U_10m, vmin=0, vmax=25)
map.drawcoastlines(linewidth = 0.5, color = '0.15')
#thinner lines for larger scale map
#plt.clim(0, 25) #added
cbar = plt.colorbar(boundaries=np.linspace(0,1,25), ticks=[0, 2, 4, 6,
8, 10, 12, 14, 16, 18, 20, 22, 24]) # sets all cbar to same limits
cbar.set_label('10m U (m/s)', size=12)
cbar.ax.set_yticklabels([0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24])
#cbar.set_clim(0, 25)
time_str = str(time_ind)
plt.title('gust 20070724' + '_' + time_str)
fig_name = '\gust20070724_'+ time_str + '.png'
plt.savefig(output_dir + fig_name)
plt.close()
#set inputs for wrf_tseries_contour_plotter(ncfile, time_ind, lllat, urlat,
lllon, urlon, output_dir)
ncfile = 'E:\WRFout_UK2Fino\wrfout_d03_2007-07-24_00%3A00%3A00'
tlist = np.arange(0,145)
#set the lower left/upper right lat/lon for axis limits on the maps
lowerllat=48
upperrlat=63
lowerllon=-10
upperrlon=25
#set output directory for figures
output_dir = '''C:\cbar_test'''
for time_ind in tlist:
wrf_tseries_contour_plotter(ncfile, time_ind, lowerllat, upperrlat,
lowerllon, upperrlon, output_dir)
You have to use vmin and vmax values to set boundaries of a colorbar like in this example:
import numpy as np
import matplotlib.cm as cm
import matplotlib.mlab as mlab
import matplotlib.pyplot as plt
# test data
x = np.linspace(0,15,100)
X,Y = np.meshgrid(x,x)
SPD1 = np.sqrt(X*X + Y*Y)
SPD2 = SPD1 * 1.3
fig = plt.figure()
# implement boundaries of colorbar and it ticks
vmin, vmax = 0, 26
levels = np.linspace(vmin,vmax,14)
# 1st subplot
ax1 = fig.add_subplot(221)
# Set contour levels and limits
CF1 = ax1.contourf(X, Y, SPD1, levels=levels, vmax=vmax, vmin=vmin)
cbar = plt.colorbar(CF1)
cbar.set_label('10m U (m/s)', size=12)
#2nd subplot
ax1 = fig.add_subplot(222)
CF1 = ax1.contourf(X, Y, SPD2, levels=levels, vmax=vmax, vmin=vmin)
cbar = plt.colorbar(CF1)
cbar.set_label('10m U (m/s)', size=12)
plt.tight_layout()
plt.show()
However you have to select vmin, vmax correctly because of if your values are outside boundaries of colorbar they will not shown (right upper corner of 2nd subplot).

Matplotlib Histogram not equal data sets

I would like to create a histogram that will use the following. I know this is because lengths of my menMeans and womenMeans are not equal. If I was not hard coding the list, and possible wanted to add some more list later to provide more bars how would I do this? What is the best way to scale the graph with knowing that the bars will not always have like sets of values.
import numpy as np
import matplotlib.pyplot as plt
fig = plt.figure()
ax = fig.add_subplot(111)
## the data
N = 5
menMeans = [18, 35, 30, 35, 27] ### len=5
womenMeans = [25, 32, 34, 20, 25,42] ### len =6
## necessary variables
ind = np.arange(N) # the x locations for the groups
width = 0.35 # the width of the bars
## the bars
rects1 = ax.bar(ind, menMeans, width,
color='black')
rects2 = ax.bar(ind+width, womenMeans, width,
color='red')
# axes and labels
ax.set_xlim(-width,len(ind)+width)
ax.set_ylim(0,45)
ax.set_ylabel('Scores')
ax.set_title('Scores by group and gender')
xTickMarks = ['Group'+str(i) for i in range(1,7)]
ax.set_xticks(ind+width)
xtickNames = ax.set_xticklabels(xTickMarks)
plt.setp(xtickNames, rotation=45, fontsize=10)
## add a legend
ax.legend( (rects1[0], rects2[0]), ('Men', 'Women') )
plt.show()
The error I get is:
Traceback (most recent call last):
File "C:\Python27\test_3.py", line 22, in <module>
color='red')
File "C:\Python27\lib\site-packages\matplotlib\axes.py", line 4999, in bar
nbars)
AssertionError: incompatible sizes: argument 'height' must be length 5 or scalar
I think the easiest way to address this would be to add one or more zero means to the men's list until it's the same length as the other one. The extra zero means don't change the appearance of the graph - it just looks like the bar is absent:
Here's a simple general function to do that:
def equalizeLists(*lists):
maxLen = max([len(list) for list in lists])
for list in lists:
list = list.extend([0]*(maxLen - len(list)))
return maxLen
This will equalize the lengths of two or more lists automatically by adding zeros to the ends of the shorter ones. You could insert it into your code like so:
## the data
menMeans = [18, 35, 30, 35, 27]
womenMeans = [25, 32, 34, 20, 25,42]
N = equalizeLists(menMeans, womenMeans)

How to plot bar graphs with same X coordinates side by side ('dodged')

import matplotlib.pyplot as plt
gridnumber = range(1,4)
b1 = plt.bar(gridnumber, [0.2, 0.3, 0.1], width=0.4,
label="Bar 1", align="center")
b2 = plt.bar(gridnumber, [0.3, 0.2, 0.2], color="red", width=0.4,
label="Bar 2", align="center")
plt.ylim([0,0.5])
plt.xlim([0,4])
plt.xticks(gridnumber)
plt.legend()
plt.show()
Currently b1 and b2 overlap each other. How do I plot them separately like so:
There is an example in the matplotlib site. Basically, you just shift the x values by width. Here is the relevant bit:
import numpy as np
import matplotlib.pyplot as plt
N = 5
menMeans = (20, 35, 30, 35, 27)
menStd = (2, 3, 4, 1, 2)
ind = np.arange(N) # the x locations for the groups
width = 0.35 # the width of the bars
fig = plt.figure()
ax = fig.add_subplot(111)
rects1 = ax.bar(ind, menMeans, width, color='royalblue', yerr=menStd)
womenMeans = (25, 32, 34, 20, 25)
womenStd = (3, 5, 2, 3, 3)
rects2 = ax.bar(ind+width, womenMeans, width, color='seagreen', yerr=womenStd)
# add some
ax.set_ylabel('Scores')
ax.set_title('Scores by group and gender')
ax.set_xticks(ind + width / 2)
ax.set_xticklabels( ('G1', 'G2', 'G3', 'G4', 'G5') )
ax.legend( (rects1[0], rects2[0]), ('Men', 'Women') )
plt.show()
Below answer will explain each and every line of code in the simplest manner possible:
# Numbers of pairs of bars you want
N = 3
# Data on X-axis
# Specify the values of blue bars (height)
blue_bar = (23, 25, 17)
# Specify the values of orange bars (height)
orange_bar = (19, 18, 14)
# Position of bars on x-axis
ind = np.arange(N)
# Figure size
plt.figure(figsize=(10,5))
# Width of a bar
width = 0.3
# Plotting
plt.bar(ind, blue_bar , width, label='Blue bar label')
plt.bar(ind + width, orange_bar, width, label='Orange bar label')
plt.xlabel('Here goes x-axis label')
plt.ylabel('Here goes y-axis label')
plt.title('Here goes title of the plot')
# xticks()
# First argument - A list of positions at which ticks should be placed
# Second argument - A list of labels to place at the given locations
plt.xticks(ind + width / 2, ('Xtick1', 'Xtick3', 'Xtick3'))
# Finding the best position for legends and putting it
plt.legend(loc='best')
plt.show()
Sometimes could be tricky to find the right bar width. I usually use this np.diff to find the right dimension.
import numpy as np
import matplotlib.pyplot as plt
#The data
womenMeans = (25, 32, 34, 20, 25)
menMeans = (20, 35, 30, 35, 27)
indices = [5.5,6,7,8.5,8.9]
#Calculate optimal width
width = np.min(np.diff(indices))/3
fig = plt.figure()
ax = fig.add_subplot(111)
# matplotlib 3.0 you have to use align
ax.bar(indices-width,womenMeans,width,color='b',label='-Ymin',align='edge')
ax.bar(indices,menMeans,width,color='r',label='Ymax',align='edge')
ax.set_xlabel('Test histogram')
plt.show()
# matplotlib 2.0 (you could avoid using align)
# ax.bar(indices-width,womenMeans,width,color='b',label='-Ymin')
# ax.bar(indices,menMeans,width,color='r',label='Ymax')
This is the result:
What if my indices on my x axis are nominal values like names:
#
import numpy as np
import matplotlib.pyplot as plt
# The data
womenMeans = (25, 32, 34, 20, 25)
menMeans = (20, 35, 30, 35, 27)
indices = range(len(womenMeans))
names = ['Asian','European','North Amercian','African','Austrailian','Martian']
# Calculate optimal width
width = np.min(np.diff(indices))/3.
fig = plt.figure()
ax = fig.add_subplot(111)
ax.bar(indices-width/2.,womenMeans,width,color='b',label='-Ymin')
ax.bar(indices+width/2.,menMeans,width,color='r',label='Ymax')
#tiks = ax.get_xticks().tolist()
ax.axes.set_xticklabels(names)
ax.set_xlabel('Test histogram')
plt.show()
Here are two examples of creating a side-by-side bar chart when you have more than two "categories" in a group.
Manual Method
Manually set the position and width of each bar.
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import ticker
coins = ['penny', 'nickle', 'dime', 'quarter']
worth = np.array([.01, .05, .10, .25])
# Coin values times *n* coins
# This controls how many bars we get in each group
values = [worth*i for i in range(1,6)]
n = len(values) # Number of bars to plot
w = .15 # With of each column
x = np.arange(0, len(coins)) # Center position of group on x axis
for i, value in enumerate(values):
position = x + (w*(1-n)/2) + i*w
plt.bar(position, value, width=w, label=f'{i+1}x')
plt.xticks(x, coins);
plt.ylabel('Monetary Value')
plt.gca().yaxis.set_major_formatter(ticker.FormatStrFormatter('$%.2f'))
plt.legend()
Pandas Method
If you put the data into a pandas DataFrame, pandas will do the hard stuff for you.
import pandas as pd
coins = ['penny', 'nickle', 'dime', 'quarter']
worth = [0.01, 0.05, 0.10, 0.25]
df = pd.DataFrame(worth, columns=['1x'], index=coins)
df['2x'] = df['1x'] * 2
df['3x'] = df['1x'] * 3
df['4x'] = df['1x'] * 4
df['5x'] = df['1x'] * 5
from matplotlib import ticker
import matplotlib.pyplot as plt
df.plot(kind='bar')
plt.ylabel('Monetary Value')
plt.gca().yaxis.set_major_formatter(ticker.FormatStrFormatter('$%.2f'))
plt.gca().xaxis.set_tick_params(rotation=0)
Pandas creates a similar figure...

Categories