I used the following code to create the attached plot:
fig, ax = plt.subplots()
fig.set_figheight(50)
fig.set_figwidth(50)
ax.matshow(power_final_for_plotting, cmap='GnBu', origin='upper')
ax.set_xticks(time_periods)
ax.set_xticklabels(time_periods)
ax.set_yticks(sig_wave_height)
ax.set_yticklabels(sig_wave_height)
for i in range(len(time_periods)):
for j in range(len(amplitude)):
c = round(power_final_for_plotting[j, i],3)
ax.text(i, j, str(c), va='center', ha='center', size=27)
plt.tight_layout()
Here time_period and sig_wave_height are lists of integers. The axis labels do not align properly in this case (Check the top right of the image to see the labels as they are). How can I fix this? The labels are really small in this case:
The current scale definition, for example, gives the necessary amount of x-axis as a numerical value, but the scale name is displayed up to 15 because it is a numerical value. We need 30 tick points and tick names for each column. Since no data is provided, I have created sample data as appropriate. Also, the small font of the scale is due to the size of 50 inches.
import numpy as np
import matplotlib.pyplot as plt
power_final_for_plotting = np.random.rand(450).reshape(15,30)
time_periods = np.arange(0,15,0.5)
sig_wave_height = np.arange(0.5,8.0,0.5)
fig, ax = plt.subplots()
fig.set_figheight(6)
fig.set_figwidth(12)
ax.matshow(power_final_for_plotting, cmap='GnBu', origin='upper')
ax.set_xticks(range(len(time_periods)))
ax.set_xticklabels([str(x) for x in time_periods])
ax.set_yticks(range(len(sig_wave_height)))
ax.set_yticklabels([str(x) for x in sig_wave_height])
for i in range(len(time_periods)):
for j in range(len(sig_wave_height)):
c = round(power_final_for_plotting[j, i],3)
ax.text(i, j, str(c), va='center', ha='center', size=9)
plt.tight_layout()
#print(ax.get_xticklabels())
plt.show()
I am trying to plot either 4 graphs (subplots) of KDE or 1 with 4 lines.
I have two columns:
Region: Charges:
southeast 6000
southeast 5422
southwest 3222
northwest 4222
northwest 5555
northeast 6729
etc 1000s of rows..4 regions
I'd like to visualize the distribution of these 4 areas.
Playing around with this and error messages (and I know it's not correct) 'Data must be 1-dimensional'.
fig, axes = plt.subplots(2, 2, sharex=True, figsize=(10,5))
fig.suptitle('Bigger 1 row x 2 columns axes with no data')
#axes[0].set_title('Title of the first chart')
reg_name = df2[['region','charges']].set_index('region')
southeast = reg_name.loc['southeast']
southwest = reg_name.loc['southwest']
northwest = reg_name.loc['northwest']
#c = df2.charges.values
#d = df2.region
# Set the dimensions of the plot
#widthInInches = 10
#heightInInches = 4
#plt.figure( figsize=(widthInInches, heightInInches) )
# Draw histograms and KDEs on the diagonal usin
#if( int(versionStrParts[1]) < 11 ):
# Use the older, now-deprectaed form
# ax = sns.distplot(c,
# kde_kws={"label": "Kernel Density", "color" : "black"},
# hist_kws={"label": "Histogram", "color" : 'lightsteelblue'})
#else:
# Use the more recent for
sns.kdeplot(ax=axes[0], x=southeast.index, y=southeast.values, color="black", label="Kernel Density")
axes[0].set_title(southeast.name)
sns.kdeplot(ax=axes[1], x=southwest.index, y=southwest.values, color="black", label="Kernel Density")
axes[1].set_title(southwest.name)
sns.kdeplot(ax=axes[0,0], data=df2[df2['region']=='southeast'], x='charges', color='k') should work for your data. Note that axes is a 2D array when both the number of rows and columns are larger than 1.
See How to plot a mean line on a distplot between 0 and the y value of the mean? for adding lines for mean, sdev etc..
Instead of doing the kdeplots one by one, sns.displot can draw them in one go (note that displot is different from distplot):
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
np.random.seed(12358)
regions = ['southeast', 'southwest', 'northeast', 'northwest']
df2 = pd.DataFrame({'region': np.repeat(regions, 100),
'charge': np.round(np.random.randn(400).cumsum() * 100 + 2000)})
g = sns.displot( kind='kde', data=df2, x='charge',
col='region', col_order=regions, col_wrap=2,
height=4, aspect=3, color='black')
for region,ax in g.axes_dict.items():
data = df2[df2['region'] == region]['charge'].values
xs, ys = ax.get_lines()[0].get_data()
median = np.median(data)
mean = data.mean()
sdev = data.std()
ax.vlines([mean-sdev, mean, mean+sdev], 0, np.interp([mean-sdev, mean, mean+sdev], xs, ys), color='b', ls=':')
ax.vlines(median, 0, np.interp(median, xs, ys), color='r', ls='--')
plt.tight_layout()
plt.show()
To draw all the regions into one plot, you can use:
fig, ax = plt.subplots(figsize=(12, 4))
sns.kdeplot(data=df2, x='charge', hue='region', ax=ax)
I'd like to draw a lognormal distribution of a given bar plot.
Here's the code
import matplotlib.pyplot as plt
from matplotlib.pyplot import figure
import numpy as np; np.random.seed(1)
import scipy.stats as stats
import math
inter = 33
x = np.logspace(-2, 1, num=3*inter+1)
yaxis = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.01,0.03,0.3,0.75,1.24,1.72,2.2,3.1,3.9,
4.3,4.9,5.3,5.6,5.87,5.96,6.01,5.83,5.42,4.97,4.60,4.15,3.66,3.07,2.58,2.19,1.90,1.54,1.24,1.08,0.85,0.73,
0.84,0.59,0.55,0.53,0.48,0.35,0.29,0.15,0.15,0.14,0.12,0.14,0.15,0.05,0.05,0.05,0.04,0.03,0.03,0.03, 0.02,
0.02,0.03,0.01,0.01,0.01,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0,0]
fig, ax = plt.subplots()
ax.bar(x[:-1], yaxis, width=np.diff(x), align="center", ec='k', color='w')
ax.set_xscale('log')
plt.xlabel('Diameter (mm)', fontsize='12')
plt.ylabel('Percentage of Total Particles (%)', fontsize='12')
plt.ylim(0,8)
plt.xlim(0.01, 10)
fig.set_size_inches(12, 12)
plt.savefig("Test.png", dpi=300, bbox_inches='tight')
Resulting plot:
What I'm trying to do is to draw the Probability Density Function exactly like the one shown in red in the graph below:
An idea is to convert everything to logspace, with u = log10(x). Then draw the density histogram in there. And also calculate a kde in the same space. Everything gets drawn as y versus u. When we have u at a top twin axes, x can stay at the bottom. Both axes get aligned by setting the same xlims, but converted to logspace on the top axis. The top axis can be hidden to get the desired result.
import matplotlib.pyplot as plt
import numpy as np
from scipy import stats
inter = 33
u = np.linspace(-2, 1, num=3*inter+1)
x = 10**u
us = np.linspace(u[0], u[-1], 500)
yaxis = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.01,0.03,0.3,0.75,1.24,1.72,2.2,3.1,3.9,
4.3,4.9,5.3,5.6,5.87,5.96,6.01,5.83,5.42,4.97,4.60,4.15,3.66,3.07,2.58,2.19,1.90,1.54,1.24,1.08,0.85,0.73,
0.84,0.59,0.55,0.53,0.48,0.35,0.29,0.15,0.15,0.14,0.12,0.14,0.15,0.05,0.05,0.05,0.04,0.03,0.03,0.03, 0.02,
0.02,0.03,0.01,0.01,0.01,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0,0]
yaxis = np.array(yaxis)
# reconstruct data from the given frequencies
u_data = np.repeat((u[:-1] + u[1:]) / 2, (yaxis * 100).astype(np.int))
kde = stats.gaussian_kde((u[:-1]+u[1:])/2, weights=yaxis, bw_method=0.2)
total_area = (np.diff(u)*yaxis).sum() # total area of all bars; divide by this area to normalize
fig, ax = plt.subplots()
ax2 = ax.twiny()
ax2.bar(u[:-1], yaxis, width=np.diff(u), align="edge", ec='k', color='w', label='frequencies')
ax2.plot(us, total_area*kde(us), color='crimson', label='kde')
ax2.plot(us, total_area * stats.norm.pdf(us, u_data.mean(), u_data.std()), color='dodgerblue', label='lognormal')
ax2.legend()
ax.set_xscale('log')
ax.set_xlabel('Diameter (mm)', fontsize='12')
ax.set_ylabel('Percentage of Total Particles (%)', fontsize='12')
ax.set_ylim(0,8)
xlim = np.array([0.01,10])
ax.set_xlim(xlim)
ax2.set_xlim(np.log10(xlim))
ax2.set_xticks([]) # hide the ticks at the top
plt.tight_layout()
plt.show()
PS: Apparently this also can be achieved directly without explicitly using u (at the cost of being slightly more cryptic):
x = np.logspace(-2, 1, num=3*inter+1)
xs = np.logspace(-2, 1, 500)
total_area = (np.diff(np.log10(x))*yaxis).sum() # total area of all bars; divide by this area to normalize
kde = gaussian_kde((np.log10(x[:-1])+np.log10(x[1:]))/2, weights=yaxis, bw_method=0.2)
ax.bar(x[:-1], yaxis, width=np.diff(x), align="edge", ec='k', color='w')
ax.plot(xs, total_area*kde(np.log10(xs)), color='crimson')
ax.set_xscale('log')
Note that the bandwidth set for gaussian_kde is a somewhat arbitrarily value. Larger values give a more equalized curve, smaller values keep closer to the data. Some experimentation can help.
I am trying to make use the polar plot projection to make a radar chart. I would like to know how to put only one grid line in bold (while the others should remain standard).
For my specific case, I would like to highlight the gridline associated to the ytick "0".
from matplotlib import pyplot as plt
import pandas as pd
import numpy as np
#Variables
sespi = pd.read_csv("country_progress.csv")
labels = sespi.country
progress = sespi.progress
angles=np.linspace(0, 2*np.pi, len(labels), endpoint=False)
#Concatenation to close the plots
progress=np.concatenate((progress,[progress[0]]))
angles=np.concatenate((angles,[angles[0]]))
#Polar plot
fig=plt.figure()
ax = fig.add_subplot(111, polar=True)
ax.plot(angles, progress, '.--', linewidth=1, c="g")
#ax.fill(angles, progress, alpha=0.25)
ax.set_thetagrids(angles * 180/np.pi, labels)
ax.set_yticklabels([-200,-150,-100,-50,0,50,100,150,200])
#ax.set_title()
ax.grid(True)
plt.show()
The gridlines of a plot are Line2D objects. Therefore you can't make it bold. What you can do (as shown, in part, in the other answer) is to increase the linewidth and change the colour but rather than plot a new line you can do this to the specified gridline.
You first need to find the index of the y tick labels which you want to change:
y_tick_labels = [-100,-10,0,10]
ind = y_tick_labels.index(0) # find index of value 0
You can then get a list of the gridlines using gridlines = ax.yaxis.get_gridlines(). Then use the index you found previously on this list to change the properties of the correct gridline.
Using the example from the gallery as a basis, a full example is shown below:
r = np.arange(0, 2, 0.01)
theta = 2 * np.pi * r
ax = plt.subplot(111, projection='polar')
ax.set_rmax(2)
ax.set_rticks([0.5, 1, 1.5, 2]) # less radial ticks
ax.set_rlabel_position(-22.5) # get radial labels away from plotted line
ax.grid(True)
y_tick_labels = [-100, -10, 0, 10]
ax.set_yticklabels(y_tick_labels)
ind = y_tick_labels.index(0) # find index of value 0
gridlines = ax.yaxis.get_gridlines()
gridlines[ind].set_color("k")
gridlines[ind].set_linewidth(2.5)
plt.show()
Which gives:
It is just a trick, but I guess you could just plot a circle and change its linewidth and color to whatever could be bold for you.
For example:
import matplotlib.pyplot as plt
import numpy as np
Yline = 0
Npoints = 300
angles = np.linspace(0,360,Npoints)*np.pi/180
line = 0*angles + Yline
ax = plt.subplot(111, projection='polar')
plt.plot(angles, line, color = 'k', linewidth = 3)
plt.ylim([-1,1])
plt.grid(True)
plt.show()
In this piece of code, I plot a line using plt.plot between any point of the two vectors angles and line. The former is actually all the angles between 0 and 2*np.pi. The latter is constant, and equal to the 'height' you want to plot that line Yline.
I suggest you try to decrease and increase Npoints while having a look to the documentaion of np.linspace() in order to understand your problem with the roundness of the circle.
I have two sets of different sizes that I'd like to plot on the same histogram. However, since one set has ~330,000 values and the other has about ~16,000 values, their frequency histograms are hard to compare. I'd like to plot a histogram comparing the two sets such that the y-axis is the % of occurrences in that bin. My code below gets close to this, except that rather than having the individual bin values sum to 1.0, the integral of the histogram sums to 1.0 (this is because of the normed=True parameter).
How can I achieve my goal? I've already tried manually calculating the % frequency and using plt.bar() but rather than overlaying the plots, the plots are compared side by side. I want to keep the effect of having the alpha=0.5
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
if plt.get_fignums():
plt.close('all')
electric = pd.read_csv('electric.tsv', sep='\t')
gas = pd.read_csv('gas.tsv', sep='\t')
electric_df = pd.DataFrame(electric)
gas_df = pd.DataFrame(ngma_nonheat)
electric = electric_df['avg_daily']*30
gas = gas_df['avg_daily']*30
## Create a plot for NGMA gas usage
plt.figure("Usage Comparison")
weights_electric = np.ones_like(electric)/float(len(electric))
weights_gas = np.ones_like(gas)/float(len(gas))
bins=np.linspace(0, 200, num=50)
n, bins, rectangles = plt.hist(electric, bins, alpha=0.5, label='electric usage', normed=True, weights=weights_electric)
plt.hist(gas, bins, alpha=0.5, label='gas usage', normed=True, weights=weights_gas)
plt.legend(loc='upper right')
plt.xlabel('Average 30 day use in therms')
plt.ylabel('% of customers')
plt.title('NGMA Customer Usage Comparison')
plt.show()
It sounds like you don't want the normed/density kwarg in this case. You're already using weights. If you multiply your weights by 100 and leave out the normed=True option, you should get exactly what you had in mind.
For example:
import matplotlib.pyplot as plt
import numpy as np
np.random.seed(1)
x = np.random.normal(5, 2, 10000)
y = np.random.normal(2, 1, 3000000)
xweights = 100 * np.ones_like(x) / x.size
yweights = 100 * np.ones_like(y) / y.size
fig, ax = plt.subplots()
ax.hist(x, weights=xweights, color='lightblue', alpha=0.5)
ax.hist(y, weights=yweights, color='salmon', alpha=0.5)
ax.set(title='Histogram Comparison', ylabel='% of Dataset in Bin')
ax.margins(0.05)
ax.set_ylim(bottom=0)
plt.show()
On the other hand, what you're currently doing (weights and normed) would result in (note the units on the y-axis):
import matplotlib.pyplot as plt
import numpy as np
np.random.seed(1)
x = np.random.normal(5, 2, 10000)
y = np.random.normal(2, 1, 3000000)
xweights = 100 * np.ones_like(x) / x.size
yweights = 100 * np.ones_like(y) / y.size
fig, ax = plt.subplots()
ax.hist(x, weights=xweights, color='lightblue', alpha=0.5, normed=True)
ax.hist(y, weights=yweights, color='salmon', alpha=0.5, normed=True)
ax.set(title='Histogram Comparison', ylabel='% of Dataset in Bin')
ax.margins(0.05)
ax.set_ylim(bottom=0)
plt.show()