Python, Matplotlib: Drawing vertical lines in 3d plot, when data is independent

Python, Matplotlib: Drawing vertical lines in 3d plot, when data is independent - python

I have a random walker in the (x,y) plane and a -log(bivariate gaussian) in the (x,y,z) plane. These two datasets are essentially independent.
I want to sample, say 5 (x,y) pairs of the random walker and draw vertical lines up the z-axis and terminate the vertical line when it "meets" the bivariate gaussian.
This is my code so far:
import matplotlib as mpl
import matplotlib.pyplot as plt
import random
import numpy as np
import seaborn as sns
import scipy
from mpl_toolkits.mplot3d import Axes3D
from matplotlib.mlab import bivariate_normal
%matplotlib inline
# Data for random walk
def randomwalk():
mpl.rcParams['legend.fontsize'] = 10
xyz = []
cur = [0, 0]
for _ in range(40):
axis = random.randrange(0, 2)
cur[axis] += random.choice([-1, 1])
xyz.append(cur[:])
# Get density
x, y = zip(*xyz)
data = np.vstack([x,y])
kde = scipy.stats.gaussian_kde(data)
density = kde(data)
# Data for bivariate gaussian
a = np.linspace(-7.5, 7.5, 40)
b = a
X,Y = np.meshgrid(a, b)
Z = bivariate_normal(X, Y)
surprise_Z = -np.log(Z)
# Get random points from walker and plot up z-axis to the gaussian
M = data[:,np.random.choice(20,5)].T
# Plot figure
fig = plt.figure(figsize=(10, 7))
ax = fig.gca(projection='3d')
ax.plot(x, y, 'grey', label='Random walk') # Walker
ax.scatter(x[-1], y[-1], c='k', marker='o') # End point
ax.legend()
surf = ax.plot_surface(X, Y, surprise_Z, rstride=1, cstride=1,
cmap = plt.cm.gist_heat_r, alpha=0.1, linewidth=0.1)
#fig.colorbar(surf, shrink=0.5, aspect=7, cmap=plt.cm.gray_r)
for i in range(5):
ax.plot([M[i,0], M[i,0]],[M[i,1], M[i,1]], [0,10],'k--',alpha=0.8, linewidth=0.5)
ax.set_zlim(0, 50)
ax.set_xlim(-10, 10)
ax.set_ylim(-10, 10)
Which produces
As you can see the only thing I'm struggling with is how to terminate the vertical lines when they meet the appropriate Z-value. Any ideas are welcome!

You're currently only letting those lines get to a height of 10 by using [0,10] as the z coordinates. You can change your loop to the following:
for i in range(5):
x = [M[i,0], M[i,0]]
y = [M[i,1], M[i,1]]
z = [0,-np.log(bivariate_normal(M[i,0],M[i,1]))]
ax.plot(x,y,z,'k--',alpha=0.8, linewidth=0.5)
This takes the x and y coordinates for each point you loop over and calculates the height of overlying Gaussian for that point and plots to there. Here is a plot with the linestyle changed to emphasize the lines relevant to the question:

Related

How to create a multi-colored curve in 3d?

I'm trying to plot a 3d curve that has different colors depending on one of its parameters. I tried this method similar to this question, but it doesn't work. Can anyone point me in the right direction?
import matplotlib.pyplot as plt
from matplotlib import cm
T=100
N=5*T
x=np.linspace(0,T,num=N)
y=np.cos(np.linspace(0,T,num=N))
z=np.sin(np.linspace(0,T,num=N))
fig = plt.figure()
ax = fig.gca(projection='3d')
ax.plot(x,y,z,cmap = cm.get_cmap("Spectral"),c=z)
plt.show()

To extend the approach in this tutorial to 3D, use x,y,z instead of x,y.
The desired shape for the segments is (number of segments, 2 points, 3 coordinates per point), so N-1,2,3. First the array of points is created with shape N, 3. Then start (xyz[:-1, :]) and end points (xyz[1:, :]) are stacked together.
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d.art3d import Line3DCollection
T = 100
N = 5 * T
x = np.linspace(0, T, num=N)
y = np.cos(np.linspace(0, T, num=N))
z = np.sin(np.linspace(0, T, num=N))
xyz = np.array([x, y, z]).T
segments = np.stack([xyz[:-1, :], xyz[1:, :]], axis=1) # shape is 499,2,3
cmap = plt.cm.get_cmap("Spectral")
norm = plt.Normalize(z.min(), z.max())
lc = Line3DCollection(segments, linewidths=2, colors=cmap(norm(z[:-1])))
fig = plt.figure()
ax = fig.gca(projection='3d')
ax.add_collection(lc)
ax.set_xlim(-10, 110)
ax.set_ylim(-1.1, 1.1)
ax.set_zlim(-1.1, 1.1)
plt.show()

Plotting scatter density plots in python [duplicate]

I'd like to make a scatter plot where each point is colored by the spatial density of nearby points.
I've come across a very similar question, which shows an example of this using R:
R Scatter Plot: symbol color represents number of overlapping points
What's the best way to accomplish something similar in python using matplotlib?

In addition to hist2d or hexbin as #askewchan suggested, you can use the same method that the accepted answer in the question you linked to uses.
If you want to do that:
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import gaussian_kde
# Generate fake data
x = np.random.normal(size=1000)
y = x * 3 + np.random.normal(size=1000)
# Calculate the point density
xy = np.vstack([x,y])
z = gaussian_kde(xy)(xy)
fig, ax = plt.subplots()
ax.scatter(x, y, c=z, s=100)
plt.show()
If you'd like the points to be plotted in order of density so that the densest points are always on top (similar to the linked example), just sort them by the z-values. I'm also going to use a smaller marker size here as it looks a bit better:
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import gaussian_kde
# Generate fake data
x = np.random.normal(size=1000)
y = x * 3 + np.random.normal(size=1000)
# Calculate the point density
xy = np.vstack([x,y])
z = gaussian_kde(xy)(xy)
# Sort the points by density, so that the densest points are plotted last
idx = z.argsort()
x, y, z = x[idx], y[idx], z[idx]
fig, ax = plt.subplots()
ax.scatter(x, y, c=z, s=50)
plt.show()

Plotting >100k data points?
The accepted answer, using gaussian_kde() will take a lot of time. On my machine, 100k rows took about 11 minutes. Here I will add two alternative methods (mpl-scatter-density and datashader) and compare the given answers with same dataset.
In the following, I used a test data set of 100k rows:
import matplotlib.pyplot as plt
import numpy as np
# Fake data for testing
x = np.random.normal(size=100000)
y = x * 3 + np.random.normal(size=100000)
Output & computation time comparison
Below is a comparison of different methods.
1: mpl-scatter-density
Installation
pip install mpl-scatter-density
Example code
import mpl_scatter_density # adds projection='scatter_density'
from matplotlib.colors import LinearSegmentedColormap
# "Viridis-like" colormap with white background
white_viridis = LinearSegmentedColormap.from_list('white_viridis', [
(0, '#ffffff'),
(1e-20, '#440053'),
(0.2, '#404388'),
(0.4, '#2a788e'),
(0.6, '#21a784'),
(0.8, '#78d151'),
(1, '#fde624'),
], N=256)
def using_mpl_scatter_density(fig, x, y):
ax = fig.add_subplot(1, 1, 1, projection='scatter_density')
density = ax.scatter_density(x, y, cmap=white_viridis)
fig.colorbar(density, label='Number of points per pixel')
fig = plt.figure()
using_mpl_scatter_density(fig, x, y)
plt.show()
Drawing this took 0.05 seconds:
And the zoom-in looks quite nice:
2: datashader
Datashader is an interesting project. It has added support for matplotlib in datashader 0.12.
Installation
pip install datashader
Code (source & parameterer listing for dsshow):
import datashader as ds
from datashader.mpl_ext import dsshow
import pandas as pd
def using_datashader(ax, x, y):
df = pd.DataFrame(dict(x=x, y=y))
dsartist = dsshow(
df,
ds.Point("x", "y"),
ds.count(),
vmin=0,
vmax=35,
norm="linear",
aspect="auto",
ax=ax,
)
plt.colorbar(dsartist)
fig, ax = plt.subplots()
using_datashader(ax, x, y)
plt.show()
It took 0.83 s to draw this:
There is also possibility to colorize by third variable. The third parameter for dsshow controls the coloring. See more examples here and the source for dsshow here.
3: scatter_with_gaussian_kde
def scatter_with_gaussian_kde(ax, x, y):
# https://stackoverflow.com/a/20107592/3015186
# Answer by Joel Kington
xy = np.vstack([x, y])
z = gaussian_kde(xy)(xy)
ax.scatter(x, y, c=z, s=100, edgecolor='')
It took 11 minutes to draw this:
4: using_hist2d
import matplotlib.pyplot as plt
def using_hist2d(ax, x, y, bins=(50, 50)):
# https://stackoverflow.com/a/20105673/3015186
# Answer by askewchan
ax.hist2d(x, y, bins, cmap=plt.cm.jet)
It took 0.021 s to draw this bins=(50,50):
It took 0.173 s to draw this bins=(1000,1000):
Cons: The zoomed-in data does not look as good as in with mpl-scatter-density or datashader. Also you have to determine the number of bins yourself.
5: density_scatter
The code is as in the answer by Guillaume.
It took 0.073 s to draw this with bins=(50,50):
It took 0.368 s to draw this with bins=(1000,1000):

Also, if the number of point makes KDE calculation too slow, color can be interpolated in np.histogram2d [Update in response to comments: If you wish to show the colorbar, use plt.scatter() instead of ax.scatter() followed by plt.colorbar()]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import cm
from matplotlib.colors import Normalize
from scipy.interpolate import interpn
def density_scatter( x , y, ax = None, sort = True, bins = 20, **kwargs ) :
"""
Scatter plot colored by 2d histogram
"""
if ax is None :
fig , ax = plt.subplots()
data , x_e, y_e = np.histogram2d( x, y, bins = bins, density = True )
z = interpn( ( 0.5*(x_e[1:] + x_e[:-1]) , 0.5*(y_e[1:]+y_e[:-1]) ) , data , np.vstack([x,y]).T , method = "splinef2d", bounds_error = False)
#To be sure to plot all data
z[np.where(np.isnan(z))] = 0.0
# Sort the points by density, so that the densest points are plotted last
if sort :
idx = z.argsort()
x, y, z = x[idx], y[idx], z[idx]
ax.scatter( x, y, c=z, **kwargs )
norm = Normalize(vmin = np.min(z), vmax = np.max(z))
cbar = fig.colorbar(cm.ScalarMappable(norm = norm), ax=ax)
cbar.ax.set_ylabel('Density')
return ax
if "__main__" == __name__ :
x = np.random.normal(size=100000)
y = x * 3 + np.random.normal(size=100000)
density_scatter( x, y, bins = [30,30] )

You could make a histogram:
import numpy as np
import matplotlib.pyplot as plt
# fake data:
a = np.random.normal(size=1000)
b = a*3 + np.random.normal(size=1000)
plt.hist2d(a, b, (50, 50), cmap=plt.cm.jet)
plt.colorbar()

Python/Matplotlib: 2d random walk with kde joint density contour in a 3d plot

I'm struggling with creating a quite complex 3d figure in python, specifically using iPython notebook. I can partition the content of the graph into two sections:
The (x,y) plane: Here a two-dimensional random walk is bobbing around, let's call it G(). I would like to plot part of this trajectory on the (x,y) plane. Say, 10% of all the data points of G(). As G() bobs around, it visits some (x,y) pairs more frequently than others. I would like to estimate this density of G() using a kernel estimation approach and draw it as contour lines on the (x,y) plane.
The (z) plane: Here, I would like to draw a mesh or (transparent) surface plot of the information theoretical surprise of a bivariate normal. Surprise is simply -log(p(i)) or the negative (base 2) logarithm of outcome i. Given the bivariate normal, each (x,y) pair has some probability p(x,y) and the surprise of this is simply -log(p(x,y)).
Essentially these two graphs are independent. Assume the interval of the random walk G() is [xmin,xmax],[ymin,ymax] and of size N. The bivariate normal in the z-plane should be drawn from the same interval, such that for each (x,y) pair in the random walk, I can draw a (dashed) line from some subset of the random walk n < N to the bivariate normal. Assume that G(10) = (5,5) then I would like to draw a dashed line from (5,5) up the Z-axes, until it hits the bivariate normal.
So far, I've managed to plot G() in a 3-d space, and estimate the density f(X,Y) using scipy.stats.gaussian_kde. In another (2d) graph, I have the sort of contour lines I want. What I don't have, is the contour lines in the 3d-plot using the estimated KDE density. I also don't have the bivariate normal plot, or the projection of a few random points from the random walk, to the surface of the bivariate normal. I've added a hand drawn figure, which might ease intuition (ignore the label on the z-axis and the fact that there is no mesh.. difficult to draw!)
Any input, even just partial, such as how to draw the contour lines in the (x,y) plane of the 3d graph, or a mesh of a bivariate normal would be much appreciated.
Thanks!
import matplotlib as mpl
import matplotlib.pyplot as plt
import random
import numpy as np
import seaborn as sns
import scipy
from mpl_toolkits.mplot3d import Axes3D
%matplotlib inline
def randomwalk():
mpl.rcParams['legend.fontsize'] = 10
xyz = []
cur = [0, 0]
for _ in range(400):
axis = random.randrange(0, 2)
cur[axis] += random.choice([-1, 1])
xyz.append(cur[:])
x, y = zip(*xyz)
data = np.vstack([x,y])
kde = scipy.stats.gaussian_kde(data)
density = kde(data)
fig1 = plt.figure()
ax = fig1.gca(projection='3d')
ax.plot(x, y, label='Random walk')
sns.kdeplot(data[0,:], data[1,:], 0)
ax.scatter(x[-1], y[-1], c='b', marker='o') # End point
ax.legend()
fig2 = plt.figure()
sns.kdeplot(data[0,:], data[1,:])
Calling randomwalk() initialises and plots this:
Edit #1:
Made some progress, actually the only thing I need is to restrict the height of the dashed vertical lines to the bivariate. Any ideas?
import matplotlib as mpl
import matplotlib.pyplot as plt
import random
import numpy as np
import seaborn as sns
import scipy
from mpl_toolkits.mplot3d import Axes3D
from matplotlib.mlab import bivariate_normal
%matplotlib inline
# Data for random walk
def randomwalk():
mpl.rcParams['legend.fontsize'] = 10
xyz = []
cur = [0, 0]
for _ in range(40):
axis = random.randrange(0, 2)
cur[axis] += random.choice([-1, 1])
xyz.append(cur[:])
# Get density
x, y = zip(*xyz)
data = np.vstack([x,y])
kde = scipy.stats.gaussian_kde(data)
density = kde(data)
# Data for bivariate gaussian
a = np.linspace(-7.5, 7.5, 20)
b = a
X,Y = np.meshgrid(a, b)
Z = bivariate_normal(X, Y)
surprise_Z = -np.log(Z)
# Get random points from walker and plot up z-axis to the gaussian
M = data[:,np.random.choice(20,5)].T
# Plot figure
fig = plt.figure(figsize=(10, 7))
ax = fig.gca(projection='3d')
ax.plot(x, y, 'grey', label='Random walk') # Walker
ax.scatter(x[-1], y[-1], c='k', marker='o') # End point
ax.legend()
surf = ax.plot_surface(X, Y, surprise_Z, rstride=1, cstride=1,
cmap = plt.cm.gist_heat_r, alpha=0.1, linewidth=0.1)
#fig.colorbar(surf, shrink=0.5, aspect=7, cmap=plt.cm.gray_r)
for i in range(5):
ax.plot([M[i,0], M[i,0]],[M[i,1], M[i,1]], [0,10],'k--',alpha=0.8, linewidth=0.5)
ax.set_zlim(0, 50)
ax.set_xlim(-10, 10)
ax.set_ylim(-10, 10)

Final code,
import matplotlib as mpl
import matplotlib.pyplot as plt
import random
import numpy as np
import seaborn as sns
import scipy
from mpl_toolkits.mplot3d import Axes3D
from matplotlib.mlab import bivariate_normal
%matplotlib inline
# Data for random walk
def randomwalk():
mpl.rcParams['legend.fontsize'] = 10
xyz = []
cur = [0, 0]
for _ in range(50):
axis = random.randrange(0, 2)
cur[axis] += random.choice([-1, 1])
xyz.append(cur[:])
# Get density
x, y = zip(*xyz)
data = np.vstack([x,y])
kde = scipy.stats.gaussian_kde(data)
density = kde(data)
# Data for bivariate gaussian
a = np.linspace(-7.5, 7.5, 100)
b = a
X,Y = np.meshgrid(a, b)
Z = bivariate_normal(X, Y)
surprise_Z = -np.log(Z)
# Get random points from walker and plot up z-axis to the gaussian
M = data[:,np.random.choice(50,10)].T
# Plot figure
fig = plt.figure(figsize=(10, 7))
ax = fig.gca(projection='3d')
ax.plot(x, y, 'grey', label='Random walk') # Walker
ax.legend()
surf = ax.plot_surface(X, Y, surprise_Z, rstride=1, cstride=1,
cmap = plt.cm.gist_heat_r, alpha=0.1, linewidth=0.1)
#fig.colorbar(surf, shrink=0.5, aspect=7, cmap=plt.cm.gray_r)
for i in range(10):
x = [M[i,0], M[i,0]]
y = [M[i,1], M[i,1]]
z = [0,-np.log(bivariate_normal(M[i,0],M[i,1]))]
ax.plot(x,y,z,'k--',alpha=0.8, linewidth=0.5)
ax.scatter(x, y, z, c='k', marker='o')

how to rotate a 3D surface in matplotlib

I have written code to plot a 3D surface of a parabaloid in matplotlib.
How would I rotate the figure so that the figure remains in place (i.e. no vertical or horizontal shifts) however it rotates around the line y = 0 and z = 0 through an angle of theta ( I have highlighted the line about which the figure should rotate in green). Here is an illustration to help visualize what I am describing:
For example, If the figure were rotated about the line through an angle of 180 degrees then this would result in the figure being flipped 'upside down' so that the point at the origin would be now be the maximum point.
I would also like to rotate the axis so that the colormap is maintained.
Here is the code for drawing the figure:
#parabaloid
import numpy as np
from matplotlib import pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
#creating grid
y = np.linspace(-1,1,1000)
x = np.linspace(-1,1,1000)
x,y = np.meshgrid(x,y)
#set z values
z = x**2+y**2
#label axes
ax.set_xlabel('x')
ax.set_ylabel('y')
ax.set_zlabel('z')
#plot figure
ax.plot_surface(x,y,z,linewidth=0, antialiased=False, shade = True, alpha = 0.5)
plt.show()

Something like this?
ax.view_init(-140, 30)
Insert it just before your plt.show() command.

Following my comment:
import mayavi.mlab as mlab
import numpy as np
x,y = np.mgrid[-1:1:0.001, -1:1:0.001]
z = x**2+y**2
s = mlab.mesh(x, y, z)
alpha = 30 # degrees
mlab.view(azimuth=0, elevation=90, roll=-90+alpha)
mlab.show()
or following #Tamas answer:
#parabaloid
import numpy as np
from matplotlib import pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from math import sin, cos, pi
import matplotlib.cm as cm
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
#creating grid
y = np.linspace(-1,1,200)
x = np.linspace(-1,1,200)
x,y = np.meshgrid(x,y)
#set z values
z0 = x**2+y**2
# rotate the samples by pi / 4 radians around y
a = pi / 4
t = np.transpose(np.array([x,y,z0]), (1,2,0))
m = [[cos(a), 0, sin(a)],[0,1,0],[-sin(a), 0, cos(a)]]
x,y,z = np.transpose(np.dot(t, m), (2,0,1))
# or `np.dot(t, m)` instead `t # m`
#label axes
ax.set_xlabel('x')
ax.set_ylabel('y')
ax.set_zlabel('z')
#plot figure
ax.plot_surface(x,y,z,linewidth=0, antialiased=False, shade = True, alpha = 0.5, facecolors=cm.viridis(z0))
plt.show()

The best I could come up with is to rotate the data itself.
#parabaloid
import numpy as np
from matplotlib import pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from math import sin, cos, pi
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
#creating grid
y = np.linspace(-1,1,200)
x = np.linspace(-1,1,200)
x,y = np.meshgrid(x,y)
#set z values
z = x**2+y**2
# rotate the samples by pi / 4 radians around y
a = pi / 4
t = np.transpose(np.array([x,y,z]), (1,2,0))
m = [[cos(a), 0, sin(a)],[0,1,0],[-sin(a), 0, cos(a)]]
x,y,z = np.transpose(t # m, (2,0,1))
# or `np.dot(t, m)` instead `t # m`
#label axes
ax.set_xlabel('x')
ax.set_ylabel('y')
ax.set_zlabel('z')
#plot figure
ax.plot_surface(x,y,z,linewidth=0, antialiased=False, shade = True, alpha = 0.5)
plt.show()

I can't seem to add a comment just yet but I wanted to make an amendment to Tamas' implementation. There is an issue where the surface is not rotated counter-clockwise to the axis (the y-axis in this case) where the y-axis is coming out of the page. Rather, it's rotated clockwise.
In order to rectify this, and to make it more straightforward, I construct the x, y and z grids and reshape them into straightforward lists on which we perform the rotation. Then I reshape them into grids in order to use the plot_surface() function:
import numpy as np
from matplotlib import pyplot as plt
from math import sin, cos, pi
import matplotlib.cm as cm
num_steps = 50
# Creating grid
y = np.linspace(-1,1,num_steps)
x = np.linspace(-1,1,num_steps)
x,y = np.meshgrid(x,y)
# Set z values
z = x**2+y**2
# Work with lists
x = x.reshape((-1))
y = y.reshape((-1))
z = z.reshape((-1))
# Rotate the samples by pi / 4 radians around y
a = pi / 4
t = np.array([x, y, z])
m = [[cos(a), 0, sin(a)],[0,1,0],[-sin(a), 0, cos(a)]]
x, y, z = np.dot(m, t)
ax = plt.axes(projection='3d')
# Label axes
ax.set_xlabel('x')
ax.set_ylabel('y')
ax.set_zlabel('z')
# Plot the surface view it with y-axis coming out of the page.
ax.view_init(30, 90)
# Plot the surface.
ax.plot_surface(x.reshape(num_steps,num_steps), y.reshape(num_steps,num_steps), z.reshape(num_steps,num_steps));

here is the best solution:
- First, you have to perform your python script in the Spyder environment which is easy to get by downloading Anaconda. Once you perform your script in Spyder, all you have to do is to follow the next instructions:
Click on “Tools”.
Click on “Preferences”.
Click on “IPython console”.
Click on “Graphics”.
Here you’ll find an option called “Backend”, you have to change it from “Inline” to “Automaticlly”.
Finally, apply the performed changes, then Click on “OK”, and reset spyder!!!!.
Once you perform the prior steps, in theory, if you run your script, then the graphics created will appear in a different windows and you could interact with them through zooming and panning. In the case of 3d plots (3d surface) you will be able to orbit it.

How can I make a scatter plot colored by density in matplotlib?

I'd like to make a scatter plot where each point is colored by the spatial density of nearby points.
I've come across a very similar question, which shows an example of this using R:
R Scatter Plot: symbol color represents number of overlapping points
What's the best way to accomplish something similar in python using matplotlib?

In addition to hist2d or hexbin as #askewchan suggested, you can use the same method that the accepted answer in the question you linked to uses.
If you want to do that:
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import gaussian_kde
# Generate fake data
x = np.random.normal(size=1000)
y = x * 3 + np.random.normal(size=1000)
# Calculate the point density
xy = np.vstack([x,y])
z = gaussian_kde(xy)(xy)
fig, ax = plt.subplots()
ax.scatter(x, y, c=z, s=100)
plt.show()
If you'd like the points to be plotted in order of density so that the densest points are always on top (similar to the linked example), just sort them by the z-values. I'm also going to use a smaller marker size here as it looks a bit better:
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import gaussian_kde
# Generate fake data
x = np.random.normal(size=1000)
y = x * 3 + np.random.normal(size=1000)
# Calculate the point density
xy = np.vstack([x,y])
z = gaussian_kde(xy)(xy)
# Sort the points by density, so that the densest points are plotted last
idx = z.argsort()
x, y, z = x[idx], y[idx], z[idx]
fig, ax = plt.subplots()
ax.scatter(x, y, c=z, s=50)
plt.show()

Plotting >100k data points?
The accepted answer, using gaussian_kde() will take a lot of time. On my machine, 100k rows took about 11 minutes. Here I will add two alternative methods (mpl-scatter-density and datashader) and compare the given answers with same dataset.
In the following, I used a test data set of 100k rows:
import matplotlib.pyplot as plt
import numpy as np
# Fake data for testing
x = np.random.normal(size=100000)
y = x * 3 + np.random.normal(size=100000)
Output & computation time comparison
Below is a comparison of different methods.
1: mpl-scatter-density
Installation
pip install mpl-scatter-density
Example code
import mpl_scatter_density # adds projection='scatter_density'
from matplotlib.colors import LinearSegmentedColormap
# "Viridis-like" colormap with white background
white_viridis = LinearSegmentedColormap.from_list('white_viridis', [
(0, '#ffffff'),
(1e-20, '#440053'),
(0.2, '#404388'),
(0.4, '#2a788e'),
(0.6, '#21a784'),
(0.8, '#78d151'),
(1, '#fde624'),
], N=256)
def using_mpl_scatter_density(fig, x, y):
ax = fig.add_subplot(1, 1, 1, projection='scatter_density')
density = ax.scatter_density(x, y, cmap=white_viridis)
fig.colorbar(density, label='Number of points per pixel')
fig = plt.figure()
using_mpl_scatter_density(fig, x, y)
plt.show()
Drawing this took 0.05 seconds:
And the zoom-in looks quite nice:
2: datashader
Datashader is an interesting project. It has added support for matplotlib in datashader 0.12.
Installation
pip install datashader
Code (source & parameterer listing for dsshow):
import datashader as ds
from datashader.mpl_ext import dsshow
import pandas as pd
def using_datashader(ax, x, y):
df = pd.DataFrame(dict(x=x, y=y))
dsartist = dsshow(
df,
ds.Point("x", "y"),
ds.count(),
vmin=0,
vmax=35,
norm="linear",
aspect="auto",
ax=ax,
)
plt.colorbar(dsartist)
fig, ax = plt.subplots()
using_datashader(ax, x, y)
plt.show()
It took 0.83 s to draw this:
There is also possibility to colorize by third variable. The third parameter for dsshow controls the coloring. See more examples here and the source for dsshow here.
3: scatter_with_gaussian_kde
def scatter_with_gaussian_kde(ax, x, y):
# https://stackoverflow.com/a/20107592/3015186
# Answer by Joel Kington
xy = np.vstack([x, y])
z = gaussian_kde(xy)(xy)
ax.scatter(x, y, c=z, s=100, edgecolor='')
It took 11 minutes to draw this:
4: using_hist2d
import matplotlib.pyplot as plt
def using_hist2d(ax, x, y, bins=(50, 50)):
# https://stackoverflow.com/a/20105673/3015186
# Answer by askewchan
ax.hist2d(x, y, bins, cmap=plt.cm.jet)
It took 0.021 s to draw this bins=(50,50):
It took 0.173 s to draw this bins=(1000,1000):
Cons: The zoomed-in data does not look as good as in with mpl-scatter-density or datashader. Also you have to determine the number of bins yourself.
5: density_scatter
The code is as in the answer by Guillaume.
It took 0.073 s to draw this with bins=(50,50):
It took 0.368 s to draw this with bins=(1000,1000):

Also, if the number of point makes KDE calculation too slow, color can be interpolated in np.histogram2d [Update in response to comments: If you wish to show the colorbar, use plt.scatter() instead of ax.scatter() followed by plt.colorbar()]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import cm
from matplotlib.colors import Normalize
from scipy.interpolate import interpn
def density_scatter( x , y, ax = None, sort = True, bins = 20, **kwargs ) :
"""
Scatter plot colored by 2d histogram
"""
if ax is None :
fig , ax = plt.subplots()
data , x_e, y_e = np.histogram2d( x, y, bins = bins, density = True )
z = interpn( ( 0.5*(x_e[1:] + x_e[:-1]) , 0.5*(y_e[1:]+y_e[:-1]) ) , data , np.vstack([x,y]).T , method = "splinef2d", bounds_error = False)
#To be sure to plot all data
z[np.where(np.isnan(z))] = 0.0
# Sort the points by density, so that the densest points are plotted last
if sort :
idx = z.argsort()
x, y, z = x[idx], y[idx], z[idx]
ax.scatter( x, y, c=z, **kwargs )
norm = Normalize(vmin = np.min(z), vmax = np.max(z))
cbar = fig.colorbar(cm.ScalarMappable(norm = norm), ax=ax)
cbar.ax.set_ylabel('Density')
return ax
if "__main__" == __name__ :
x = np.random.normal(size=100000)
y = x * 3 + np.random.normal(size=100000)
density_scatter( x, y, bins = [30,30] )

You could make a histogram:
import numpy as np
import matplotlib.pyplot as plt
# fake data:
a = np.random.normal(size=1000)
b = a*3 + np.random.normal(size=1000)
plt.hist2d(a, b, (50, 50), cmap=plt.cm.jet)
plt.colorbar()

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

Python, Matplotlib: Drawing vertical lines in 3d plot, when data is independent - python

Related

How to create a multi-colored curve in 3d?

Plotting scatter density plots in python [duplicate]

Python/Matplotlib: 2d random walk with kde joint density contour in a 3d plot

how to rotate a 3D surface in matplotlib

How can I make a scatter plot colored by density in matplotlib?

Categories

Resources