Add KDE on to a histogram - python

I would like to add a density plot to my histogram diagram. I know something about pdf function but I've got confused and other similar questions were not helpful.
from scipy.stats import *
from numpy import*
from matplotlib.pyplot import*
from random import*
nums = []
N = 100
for i in range(N):
a = randint(0,9)
nums.append(a)
bars= [0,1,2,3,4,5,6,7,8,9]
alpha, loc, beta=5, 100, 22
hist(nums,normed= True,bins = bars)
show()
I'm looking for something like this

from scipy import stats
import numpy as np
import matplotlib.pyplot as plt
np.random.seed(41)
N = 100
x = np.random.randint(0, 9, N)
bins = np.arange(10)
kde = stats.gaussian_kde(x)
xx = np.linspace(0, 9, 1000)
fig, ax = plt.subplots(figsize=(8,6))
ax.hist(x, density=True, bins=bins, alpha=0.3)
ax.plot(xx, kde(xx))

Here's a solution using seaborn 0.11.1 and pandas 1.1.5:
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import numpy as np
N = 100
nums = [np.random.randint(i-i, 9) for i in range(N)]
df = pd.DataFrame(nums, columns=["value"])
fig, ax1 = plt.subplots()
sns.kdeplot(data=df, x="value", ax=ax1)
ax1.set_xlim((df["value"].min(), df["value"].max()))
ax2 = ax1.twinx()
sns.histplot(data=df, x="value", discrete=True, ax=ax2)
Note how I use numpy to generate the random values because I need actual values, not generators. The discrete=True in the last line assures that the ticks are centered.

distplot from Seaborn offers histogram plot as well as distribution graph together:
sns.distplot(df)

Related

How to normalize a 2d histogram in python?

I'm trying to plot a 2d histogram. The histogram is basically a galaxy and I have the points of each luminous point. I have plotted the histogram but it's not properly normalized, as the values of the colorbar should go from 0 to 1. How can I fix this?
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.stats import kde
fig, axes = plt.subplots(ncols=2, nrows=1, figsize=(20, 8))
data1 = pd.read_csv('mydata.txt', sep='\s+', header=None)
az1 = data1[0]
el1 = data1[1]
nbins = 250
hist1 = axes[0].hist2d(az1, el1, bins=nbins, cmap='magma', density=True)
fig.colorbar(hist1[3], ax = axes)
I tried with the function hist2Dbut I didn't find a way to normalized the result with it. So what I suggest is using the hitrogram from the numpy modul: np.nistogram2d where you can extract the result and then normalized the output before display it.
Here an example with random numbers:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.stats import kde
fig, axes = plt.subplots(ncols=2, nrows=1, figsize=(20, 8))
# data1 = pd.read_csv('mydata.txt', sep='\s+', header=None)
N=10000
az1 = np.random.random(N)
el1 = np.random.random(N)
nbins = 250
hist1 = axes[0].hist2d(az1, el1, bins=nbins, cmap='magma', density=True)
fig.colorbar(hist1[3], ax = axes)
H, xedges, yedges = np.histogram2d(el1, az1, bins=(nbins, nbins),density=True )
# H_normalized = H/float(az1.shape[0]) # the integral over the histogrm is 1
H_normalized = H/H.max((0,1)) # the max value of the histogrm is 1
extent = [xedges[0], xedges[-1], yedges[0], yedges[-1]]
im = axes[1].imshow(H_normalized, extent=extent, cmap='magma', interpolation='none',origin ='lower')
fig.colorbar(im, ax=axes[1])
plt.show()

Filling subplot with colormap - Matplotlib LogNorm does work in python 3 anymore

I had pretty nice plots looking like this created a while ago in python 2.7.
Now it appears that LogNorm does not work anymore.
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import LogNorm
fig = plt.figure()
ax = fig.add_subplot(111)
# creating logspaced values for colorbar
x = np.logspace(-8,-3,6)
yarr = np.vstack((x,))
print(yarr)
# check if yarr is really logspaced
ax.plot(yarr, [1e1]*len(yarr), 'w.-')
# fill box with colorbar - this does not work anymore
ax.imshow(yarr, extent=(1e-8, 1e-3, 1, 1e4), norm=LogNorm(vmin=1e-8, vmax=1e-3))
ax.set_xscale("log")
ax.set_yscale("log")
Output now
Thanks in advance.
It was pointed out to me that it is a problem of matplotlib:
https://github.com/matplotlib/matplotlib/issues/7661/
import numpy as np
import matplotlib.pyplot as plt
tmp = np.arange(199).reshape(1, 199)
y = np.logspace(0, -4, 2)
x = np.logspace(-8, -3, 200)
fig, ax = plt.subplots()
ax.set_xscale('log')
ax.set_yscale('log')
ax.pcolormesh(x, y, tmp)
plt.show()
This solves the problem.

Matplotlib not plotting all points

I am trying to plot a 3D-Array in matplotlib, but I only see a linear output. The expected output was a 10x10x10 cube.
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
points = np.zeros((10, 10, 10))
for x in range(10):
for y in range(10):
for z in range(10):
points[x][y][z] = z
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.scatter(points[:,0],points[:,1],points[:,2])
plt.show()
OK, you were very, very close. I didn't realize how close until I tried it. The problem you had was that you made points a 3D array where each entry had a value. It needed to be a 2D array, 1000 x 3.
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
points = []
for x in range(10):
for y in range(10):
for z in range(10):
points.append((x,y,z))
points = np.array(points)
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.scatter(points[:,0],points[:,1],points[:,2])
plt.show()
You've got a good answer by Tim. However, there are alternatives approaches. For example, there is np.meshgrid() that are often used in your situation to produce and manipulate data. Here is the code to generate array of data and produce sample plot.
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
n1 = 10 #number of grid rows/columns
xg, yg = np.meshgrid(np.arange(n1),np.arange(n1))
for i in np.arange(n1):
zg = np.ones(xg.shape) * i
ax.scatter(xg, yg, zg, s=3, c='k')
lim = n1 + 0.1*n1
ax.set_xlim3d(-0.1*n1, lim)
ax.set_ylim3d(-0.1*n1, lim)
ax.set_zlim3d(-0.1*n1, lim)
# set viewing angle
ax.azim = 120 # z rotation (default=270); 160+112
ax.elev = 35 # x rotation (default=0)
ax.dist = 10 # zoom (define perspective)
plt.show()

seaborn histogram to have different color bins

A simple histogram by seaborn. I want to highlight the top 3 bins with a different color. Here shows a matplotlib way, but not a seaborn way.
Is there any ways to show different colored bins in seaborn?
Thank you.
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
data = np.random.normal(loc = 6, size=100)
ax = sns.distplot(data, bins = 20)
plt.xlim(0, 10)
plt.show()
If there are no other plots on the same ax, you could loop through all its patches, find the 3 highest and color them:
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
data = np.random.normal(loc = 6, size=500)
ax = sns.distplot(data, bins = 20)
heights = [p.get_height() for p in ax.patches]
third_highest = sorted(heights)[-3]
for p in ax.patches:
if p.get_height() >= third_highest:
p.set_color('crimson')
plt.xlim(0, 10)
plt.show()

How to format seaborn/matplotlib axis tick labels from number to thousands or Millions? (125,436 to 125.4K)

import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import seaborn as sns
import pandas as pd
sns.set(style="darkgrid")
fig, ax = plt.subplots(figsize=(8, 5))
palette = sns.color_palette("bright", 6)
g = sns.scatterplot(ax=ax, x="Area", y="Rent/Sqft", hue="Region", marker='o', data=df, s=100, palette= palette)
g.legend(bbox_to_anchor=(1, 1), ncol=1)
g.set(xlim = (50000,250000))
How can I can change the axis format from a number to custom format? For example, 125000 to 125.00K
IIUC you can format the xticks and set these:
In[60]:
#generate some psuedo data
df = pd.DataFrame({'num':[50000, 75000, 100000, 125000], 'Rent/Sqft':np.random.randn(4), 'Region':list('abcd')})
df
Out[60]:
num Rent/Sqft Region
0 50000 0.109196 a
1 75000 0.566553 b
2 100000 -0.274064 c
3 125000 -0.636492 d
In[61]:
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import seaborn as sns
import pandas as pd
sns.set(style="darkgrid")
fig, ax = plt.subplots(figsize=(8, 5))
palette = sns.color_palette("bright", 4)
g = sns.scatterplot(ax=ax, x="num", y="Rent/Sqft", hue="Region", marker='o', data=df, s=100, palette= palette)
g.legend(bbox_to_anchor=(1, 1), ncol=1)
g.set(xlim = (50000,250000))
xlabels = ['{:,.2f}'.format(x) + 'K' for x in g.get_xticks()/1000]
g.set_xticklabels(xlabels)
Out[61]:
The key bit here is this line:
xlabels = ['{:,.2f}'.format(x) + 'K' for x in g.get_xticks()/1000]
g.set_xticklabels(xlabels)
So this divides all the ticks by 1000 and then formats them and sets the xtick labels
UPDATE
Thanks to #ScottBoston who has suggested a better method:
ax.xaxis.set_major_formatter(ticker.FuncFormatter(lambda x, pos: '{:,.2f}'.format(x/1000) + 'K'))
see the docs
The canonical way of formatting the tick labels in the standard units is to use an EngFormatter. There is also an example in the matplotlib docs.
Also see Tick locating and formatting
Here it might look as follows.
import numpy as np; np.random.seed(42)
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import seaborn as sns
import pandas as pd
df = pd.DataFrame({"xaxs" : np.random.randint(50000,250000, size=20),
"yaxs" : np.random.randint(7,15, size=20),
"col" : np.random.choice(list("ABC"), size=20)})
fig, ax = plt.subplots(figsize=(8, 5))
palette = sns.color_palette("bright", 6)
sns.scatterplot(ax=ax, x="xaxs", y="yaxs", hue="col", data=df,
marker='o', s=100, palette="magma")
ax.legend(bbox_to_anchor=(1, 1), ncol=1)
ax.set(xlim = (50000,250000))
ax.xaxis.set_major_formatter(ticker.EngFormatter())
plt.show()
Using Seaborn without importing matplotlib:
import seaborn as sns
sns.set()
chart = sns.relplot(x="x_val", y="y_val", kind="line", data=my_data)
ticks = chart.axes[0][0].get_xticks()
xlabels = ['$' + '{:,.0f}'.format(x) for x in ticks]
chart.set_xticklabels(xlabels)
chart.fig
Thank you to EdChum's answer above for getting me 90% there.
Here's how I'm solving this: (similar to ScottBoston)
from matplotlib.ticker import FuncFormatter
f = lambda x, pos: f'{x/10**3:,.0f}K'
ax.xaxis.set_major_formatter(FuncFormatter(f))
We could used the APIs: ax.get_xticklabels() , get_text() and ax.set_xticklabels do it.
e.g,
xlabels = ['{:.2f}k'.format(float(x.get_text().replace('−', '-')))/1000 for x in g.get_xticklabels()]
g.set_xticklabels(xlabels)

Categories