Location of contour lines - python

Below is my code and plot:
import matplotlib
import matplotlib.mlab as mlab
import matplotlib.cm as cm
import matplotlib.pyplot as plt
import numpy as np
%matplotlib inline
delta = 0.00025
A=0
x = np.arange(0, 0.10, delta)
y = np.arange(0, 0.1, delta)
X, Y = np.meshgrid(x, y)
Z = A*(X**2+Y**2)+2*X*Y
manual_locations = [(0.1,0.1), (0.2,0.2), (0.3,0.3),
(0.015, 0.015), (0.00255, 0.0025), (0.00005,0.00005)]
line_widths = (1, 1, 1, 1, 1, 1)
plt.figure()
CS = plt.contour(X, Y, Z, 6, # add 6 contour lines
linewidths=line_widths, # line widths
colors = line_colours) # line colours
plt.clabel(CS, inline=1, # add labels
fontsize=10, # label font size
manual=manual_locations) # label locations
plt.title('Indifference Map') # title
plt.show()
It seems my manual_locations does nothing, python picks equally spaced contour lines automatically. While I want to investigate more details around 0. How can I see more curves/contour lines converging to (0,0)?
Thanks in advance.

The easiest way to explore parts of your data in more details is with levels. This sets what Z-values to examine, and in your question you phrase this as an (x,y) location to inspect, but it's a bit backwards from how contour works to specify the location points directly.
You could also do inspect the (0,0) region by changing the boundaries of the plot appropriately.
Below, I use log values for levels, but linear values work equally well, are far more common, and are easier to interpret. The log values just easily emphasis the part of the plot you're most interested in.
import matplotlib
import matplotlib.mlab as mlab
import matplotlib.cm as cm
import matplotlib.pyplot as plt
import numpy as np
#%matplotlib inline
delta = 0.00025
A=0
x = np.arange(0, 0.10, delta)
y = np.arange(0, 0.1, delta)
X, Y = np.meshgrid(x, y)
Z = A*(X**2+Y**2)+2*X*Y
manual_locations = [(0.1,0.1), (0.2,0.2), (0.3,0.3),
(0.015, 0.015), (0.00255, 0.0025), (0.00005,0.00005)]
line_widths = (1, 1, 1, 1, 1, 1)
plt.figure()
CS = plt.contour(X, Y, Z, 6, # add 6 contour lines
linewidths=line_widths,
#levels=np.linspace(0, .003, 20))
levels=np.logspace(-5, -2, 20))
plt.clabel(CS, inline=1, # add labels
fontsize=10,
fmt="%.5f")
plt.title('Indifference Map') # title
plt.show()
If you really need the contour at a specific location, you could put the (x,y) values for that location into your equation to calculate the z-value at that location, and then use this value as one of the values in the levels argument.

Related

plot_trisurface with custom color array

I basically want to "imshow" the pdf of a three-dimensional Dirichlet distribution on its support. Function simplex below computes regular points on that support, which are stored in the array sim. The array pdf holds a scalar density for each row in sim.
First thing I thought of was to use a triangulation. However, the color argument of plot_trisurface supports only one single color for all triangles. Setting cmap colors the triangles based on the z-coordinate values (See Fig. 1). Also plot_trisurface ignores the facecolors kwarg. What I want, however, is to color the surface based on pdf.
As a workaround I found, that I could interpolated the surface as 3d scatter plot. This generally gives the desired visualization, yet I ist clearly visible that it's a scatter plot; especially on the borders. (See Fig 2.)
Is there a way to plot the projection of the pdf onto the simplex?
import itertools
import matplotlib.pyplot as plt
import numpy as np
from scipy import stats
def simplex(n_vals):
base = np.linspace(0, 1, n_vals, endpoint=False)
coords = np.asarray(list(itertools.product(base, repeat=3)))
return coords[np.isclose(coords.sum(axis=-1), 1.0)]
sim = simplex(20)
pdf = stats.dirichlet([1.1, 1.5, 1.3]).pdf(sim.T)
fig1 = plt.figure()
ax1 = fig1.add_subplot(1, 2, 1, projection='3d', azim=20)
ax2 = fig1.add_subplot(1, 2, 2, projection='3d', azim=20)
ax1.plot_trisurf(x, y, z, color='k')
ax2.plot_trisurf(x, y, z, cmap='Spectral')
fig2 = plt.figure()
ax21 = fig2.add_subplot(projection='3d', azim=20)
ax21.scatter3D(*sim.T, s=50, alpha=.5, c=pdf, cmap='Spectral')
This is a way to do so by coloring each triangle in a triangulation object with the right color. Is this what you were looking for? The only thing is that each patch has a uniform color which make the patches somewhat visible.
# Setup is the same
import itertools
import matplotlib.pyplot as plt
from pylab import get_cmap
from matplotlib.tri import Triangulation, LinearTriInterpolator
import numpy as np
from scipy import stats
from mpl_toolkits.mplot3d.art3d import Poly3DCollection
def simplex(n_vals):
base = np.linspace(0, 1, n_vals, endpoint=False)
coords = np.asarray(list(itertools.product(base, repeat=3)))
return coords[np.isclose(coords.sum(axis=-1), 1.0)]
sim = simplex(20)
pdf = stats.dirichlet([1.1, 1.5, 1.3]).pdf(sim.T)
# For shorter notation we define x, y and z:
x = sim[:, 0]
y = sim[:, 1]
z = sim[:, 2]
# Creating a triangulation object and using it to extract the actual triangles.
# Note if it is necessary that no patch will be vertical (i.e. along the z direction)
tri = Triangulation(x, y)
triangle_vertices = np.array([np.array([[x[T[0]], y[T[0]], z[T[0]]],
[x[T[1]], y[T[1]], z[T[1]]],
[x[T[2]], y[T[2]], z[T[2]]]]) for T in tri.triangles])
# Finding coordinate for the midpoints of each triangle.
# This will be used to extract the color
midpoints = np.average(triangle_vertices, axis = 1)
midx = midpoints[:, 0]
midy = midpoints[:, 1]
# Interpolating the pdf and using it with the selected cmap to produce the color RGB vector for each face.
# Some roundoff and normalization are needed
face_color_function = LinearTriInterpolator(tri, pdf)
face_color_index = face_color_function(midx, midy)
face_color_index[face_color_index < 0] = 0
face_color_index /= np.max(pdf)
cmap = get_cmap('Spectral')
# Creating the patches and plotting
collection = Poly3DCollection(triangle_vertices, facecolors=cmap(face_color_index), edgecolors=None)
fig = plt.figure()
ax = fig.gca(projection='3d')
ax.add_collection(collection)
plt.show()
Obviously increasing the resolution would make the plot smoother.
This figure, complete with a colorbar,
was produced by the following script — the function map_colors, defined at the end of the script, could interest the general reader.
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats
from itertools import product as Π
# the distribution that we want to study
dirichlet = stats.dirichlet([1.1, 1.5, 1.3])
# generate the "mesh"
N = 30 # no. of triangles along an edge
s = np.linspace(0, 1, N+1)
x, y, z = np.array([(x,y,1-x-y) for x,y in Π(s,s) if x+y<1+1E-6]).T
# plot as usual
fig = plt.figure()
ax = fig.add_subplot(1, 1, 1, projection='3d', azim=20)
p3dc = ax.plot_trisurf(x, y, z)
########## change the face colors ####################
mappable = map_colors(p3dc, dirichlet.pdf, 'Spectral')
# ####################################################
# possibly add a colormap
plt.colorbar(mappable, shrink=0.67, aspect=16.7)
# we are done
plt.show()
def map_colors(p3dc, func, cmap='viridis'):
"""
Color a tri-mesh according to a function evaluated in each barycentre.
p3dc: a Poly3DCollection, as returned e.g. by ax.plot_trisurf
func: a single-valued function of 3 arrays: x, y, z
cmap: a colormap NAME, as a string
Returns a ScalarMappable that can be used to instantiate a colorbar.
"""
from matplotlib.cm import ScalarMappable, get_cmap
from matplotlib.colors import Normalize
from numpy import array
# reconstruct the triangles from internal data
x, y, z, _ = p3dc._vec
slices = p3dc._segslices
triangles = array([array((x[s],y[s],z[s])).T for s in slices])
# compute the barycentres for each triangle
xb, yb, zb = triangles.mean(axis=1).T
# compute the function in the barycentres
values = func(xb, yb, zb)
# usual stuff
norm = Normalize()
colors = get_cmap(cmap)(norm(values))
# set the face colors of the Poly3DCollection
p3dc.set_fc(colors)
# if the caller wants a colorbar, they need this
return ScalarMappable(cmap=cmap, norm=norm)

Plotting scatter density plots in python [duplicate]

I'd like to make a scatter plot where each point is colored by the spatial density of nearby points.
I've come across a very similar question, which shows an example of this using R:
R Scatter Plot: symbol color represents number of overlapping points
What's the best way to accomplish something similar in python using matplotlib?
In addition to hist2d or hexbin as #askewchan suggested, you can use the same method that the accepted answer in the question you linked to uses.
If you want to do that:
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import gaussian_kde
# Generate fake data
x = np.random.normal(size=1000)
y = x * 3 + np.random.normal(size=1000)
# Calculate the point density
xy = np.vstack([x,y])
z = gaussian_kde(xy)(xy)
fig, ax = plt.subplots()
ax.scatter(x, y, c=z, s=100)
plt.show()
If you'd like the points to be plotted in order of density so that the densest points are always on top (similar to the linked example), just sort them by the z-values. I'm also going to use a smaller marker size here as it looks a bit better:
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import gaussian_kde
# Generate fake data
x = np.random.normal(size=1000)
y = x * 3 + np.random.normal(size=1000)
# Calculate the point density
xy = np.vstack([x,y])
z = gaussian_kde(xy)(xy)
# Sort the points by density, so that the densest points are plotted last
idx = z.argsort()
x, y, z = x[idx], y[idx], z[idx]
fig, ax = plt.subplots()
ax.scatter(x, y, c=z, s=50)
plt.show()
Plotting >100k data points?
The accepted answer, using gaussian_kde() will take a lot of time. On my machine, 100k rows took about 11 minutes. Here I will add two alternative methods (mpl-scatter-density and datashader) and compare the given answers with same dataset.
In the following, I used a test data set of 100k rows:
import matplotlib.pyplot as plt
import numpy as np
# Fake data for testing
x = np.random.normal(size=100000)
y = x * 3 + np.random.normal(size=100000)
Output & computation time comparison
Below is a comparison of different methods.
1: mpl-scatter-density
Installation
pip install mpl-scatter-density
Example code
import mpl_scatter_density # adds projection='scatter_density'
from matplotlib.colors import LinearSegmentedColormap
# "Viridis-like" colormap with white background
white_viridis = LinearSegmentedColormap.from_list('white_viridis', [
(0, '#ffffff'),
(1e-20, '#440053'),
(0.2, '#404388'),
(0.4, '#2a788e'),
(0.6, '#21a784'),
(0.8, '#78d151'),
(1, '#fde624'),
], N=256)
def using_mpl_scatter_density(fig, x, y):
ax = fig.add_subplot(1, 1, 1, projection='scatter_density')
density = ax.scatter_density(x, y, cmap=white_viridis)
fig.colorbar(density, label='Number of points per pixel')
fig = plt.figure()
using_mpl_scatter_density(fig, x, y)
plt.show()
Drawing this took 0.05 seconds:
And the zoom-in looks quite nice:
2: datashader
Datashader is an interesting project. It has added support for matplotlib in datashader 0.12.
Installation
pip install datashader
Code (source & parameterer listing for dsshow):
import datashader as ds
from datashader.mpl_ext import dsshow
import pandas as pd
def using_datashader(ax, x, y):
df = pd.DataFrame(dict(x=x, y=y))
dsartist = dsshow(
df,
ds.Point("x", "y"),
ds.count(),
vmin=0,
vmax=35,
norm="linear",
aspect="auto",
ax=ax,
)
plt.colorbar(dsartist)
fig, ax = plt.subplots()
using_datashader(ax, x, y)
plt.show()
It took 0.83 s to draw this:
There is also possibility to colorize by third variable. The third parameter for dsshow controls the coloring. See more examples here and the source for dsshow here.
3: scatter_with_gaussian_kde
def scatter_with_gaussian_kde(ax, x, y):
# https://stackoverflow.com/a/20107592/3015186
# Answer by Joel Kington
xy = np.vstack([x, y])
z = gaussian_kde(xy)(xy)
ax.scatter(x, y, c=z, s=100, edgecolor='')
It took 11 minutes to draw this:
4: using_hist2d
import matplotlib.pyplot as plt
def using_hist2d(ax, x, y, bins=(50, 50)):
# https://stackoverflow.com/a/20105673/3015186
# Answer by askewchan
ax.hist2d(x, y, bins, cmap=plt.cm.jet)
It took 0.021 s to draw this bins=(50,50):
It took 0.173 s to draw this bins=(1000,1000):
Cons: The zoomed-in data does not look as good as in with mpl-scatter-density or datashader. Also you have to determine the number of bins yourself.
5: density_scatter
The code is as in the answer by Guillaume.
It took 0.073 s to draw this with bins=(50,50):
It took 0.368 s to draw this with bins=(1000,1000):
Also, if the number of point makes KDE calculation too slow, color can be interpolated in np.histogram2d [Update in response to comments: If you wish to show the colorbar, use plt.scatter() instead of ax.scatter() followed by plt.colorbar()]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import cm
from matplotlib.colors import Normalize
from scipy.interpolate import interpn
def density_scatter( x , y, ax = None, sort = True, bins = 20, **kwargs ) :
"""
Scatter plot colored by 2d histogram
"""
if ax is None :
fig , ax = plt.subplots()
data , x_e, y_e = np.histogram2d( x, y, bins = bins, density = True )
z = interpn( ( 0.5*(x_e[1:] + x_e[:-1]) , 0.5*(y_e[1:]+y_e[:-1]) ) , data , np.vstack([x,y]).T , method = "splinef2d", bounds_error = False)
#To be sure to plot all data
z[np.where(np.isnan(z))] = 0.0
# Sort the points by density, so that the densest points are plotted last
if sort :
idx = z.argsort()
x, y, z = x[idx], y[idx], z[idx]
ax.scatter( x, y, c=z, **kwargs )
norm = Normalize(vmin = np.min(z), vmax = np.max(z))
cbar = fig.colorbar(cm.ScalarMappable(norm = norm), ax=ax)
cbar.ax.set_ylabel('Density')
return ax
if "__main__" == __name__ :
x = np.random.normal(size=100000)
y = x * 3 + np.random.normal(size=100000)
density_scatter( x, y, bins = [30,30] )
You could make a histogram:
import numpy as np
import matplotlib.pyplot as plt
# fake data:
a = np.random.normal(size=1000)
b = a*3 + np.random.normal(size=1000)
plt.hist2d(a, b, (50, 50), cmap=plt.cm.jet)
plt.colorbar()

Python/Matplotlib: 2d random walk with kde joint density contour in a 3d plot

I'm struggling with creating a quite complex 3d figure in python, specifically using iPython notebook. I can partition the content of the graph into two sections:
The (x,y) plane: Here a two-dimensional random walk is bobbing around, let's call it G(). I would like to plot part of this trajectory on the (x,y) plane. Say, 10% of all the data points of G(). As G() bobs around, it visits some (x,y) pairs more frequently than others. I would like to estimate this density of G() using a kernel estimation approach and draw it as contour lines on the (x,y) plane.
The (z) plane: Here, I would like to draw a mesh or (transparent) surface plot of the information theoretical surprise of a bivariate normal. Surprise is simply -log(p(i)) or the negative (base 2) logarithm of outcome i. Given the bivariate normal, each (x,y) pair has some probability p(x,y) and the surprise of this is simply -log(p(x,y)).
Essentially these two graphs are independent. Assume the interval of the random walk G() is [xmin,xmax],[ymin,ymax] and of size N. The bivariate normal in the z-plane should be drawn from the same interval, such that for each (x,y) pair in the random walk, I can draw a (dashed) line from some subset of the random walk n < N to the bivariate normal. Assume that G(10) = (5,5) then I would like to draw a dashed line from (5,5) up the Z-axes, until it hits the bivariate normal.
So far, I've managed to plot G() in a 3-d space, and estimate the density f(X,Y) using scipy.stats.gaussian_kde. In another (2d) graph, I have the sort of contour lines I want. What I don't have, is the contour lines in the 3d-plot using the estimated KDE density. I also don't have the bivariate normal plot, or the projection of a few random points from the random walk, to the surface of the bivariate normal. I've added a hand drawn figure, which might ease intuition (ignore the label on the z-axis and the fact that there is no mesh.. difficult to draw!)
Any input, even just partial, such as how to draw the contour lines in the (x,y) plane of the 3d graph, or a mesh of a bivariate normal would be much appreciated.
Thanks!
import matplotlib as mpl
import matplotlib.pyplot as plt
import random
import numpy as np
import seaborn as sns
import scipy
from mpl_toolkits.mplot3d import Axes3D
%matplotlib inline
def randomwalk():
mpl.rcParams['legend.fontsize'] = 10
xyz = []
cur = [0, 0]
for _ in range(400):
axis = random.randrange(0, 2)
cur[axis] += random.choice([-1, 1])
xyz.append(cur[:])
x, y = zip(*xyz)
data = np.vstack([x,y])
kde = scipy.stats.gaussian_kde(data)
density = kde(data)
fig1 = plt.figure()
ax = fig1.gca(projection='3d')
ax.plot(x, y, label='Random walk')
sns.kdeplot(data[0,:], data[1,:], 0)
ax.scatter(x[-1], y[-1], c='b', marker='o') # End point
ax.legend()
fig2 = plt.figure()
sns.kdeplot(data[0,:], data[1,:])
Calling randomwalk() initialises and plots this:
Edit #1:
Made some progress, actually the only thing I need is to restrict the height of the dashed vertical lines to the bivariate. Any ideas?
import matplotlib as mpl
import matplotlib.pyplot as plt
import random
import numpy as np
import seaborn as sns
import scipy
from mpl_toolkits.mplot3d import Axes3D
from matplotlib.mlab import bivariate_normal
%matplotlib inline
# Data for random walk
def randomwalk():
mpl.rcParams['legend.fontsize'] = 10
xyz = []
cur = [0, 0]
for _ in range(40):
axis = random.randrange(0, 2)
cur[axis] += random.choice([-1, 1])
xyz.append(cur[:])
# Get density
x, y = zip(*xyz)
data = np.vstack([x,y])
kde = scipy.stats.gaussian_kde(data)
density = kde(data)
# Data for bivariate gaussian
a = np.linspace(-7.5, 7.5, 20)
b = a
X,Y = np.meshgrid(a, b)
Z = bivariate_normal(X, Y)
surprise_Z = -np.log(Z)
# Get random points from walker and plot up z-axis to the gaussian
M = data[:,np.random.choice(20,5)].T
# Plot figure
fig = plt.figure(figsize=(10, 7))
ax = fig.gca(projection='3d')
ax.plot(x, y, 'grey', label='Random walk') # Walker
ax.scatter(x[-1], y[-1], c='k', marker='o') # End point
ax.legend()
surf = ax.plot_surface(X, Y, surprise_Z, rstride=1, cstride=1,
cmap = plt.cm.gist_heat_r, alpha=0.1, linewidth=0.1)
#fig.colorbar(surf, shrink=0.5, aspect=7, cmap=plt.cm.gray_r)
for i in range(5):
ax.plot([M[i,0], M[i,0]],[M[i,1], M[i,1]], [0,10],'k--',alpha=0.8, linewidth=0.5)
ax.set_zlim(0, 50)
ax.set_xlim(-10, 10)
ax.set_ylim(-10, 10)
Final code,
import matplotlib as mpl
import matplotlib.pyplot as plt
import random
import numpy as np
import seaborn as sns
import scipy
from mpl_toolkits.mplot3d import Axes3D
from matplotlib.mlab import bivariate_normal
%matplotlib inline
# Data for random walk
def randomwalk():
mpl.rcParams['legend.fontsize'] = 10
xyz = []
cur = [0, 0]
for _ in range(50):
axis = random.randrange(0, 2)
cur[axis] += random.choice([-1, 1])
xyz.append(cur[:])
# Get density
x, y = zip(*xyz)
data = np.vstack([x,y])
kde = scipy.stats.gaussian_kde(data)
density = kde(data)
# Data for bivariate gaussian
a = np.linspace(-7.5, 7.5, 100)
b = a
X,Y = np.meshgrid(a, b)
Z = bivariate_normal(X, Y)
surprise_Z = -np.log(Z)
# Get random points from walker and plot up z-axis to the gaussian
M = data[:,np.random.choice(50,10)].T
# Plot figure
fig = plt.figure(figsize=(10, 7))
ax = fig.gca(projection='3d')
ax.plot(x, y, 'grey', label='Random walk') # Walker
ax.legend()
surf = ax.plot_surface(X, Y, surprise_Z, rstride=1, cstride=1,
cmap = plt.cm.gist_heat_r, alpha=0.1, linewidth=0.1)
#fig.colorbar(surf, shrink=0.5, aspect=7, cmap=plt.cm.gray_r)
for i in range(10):
x = [M[i,0], M[i,0]]
y = [M[i,1], M[i,1]]
z = [0,-np.log(bivariate_normal(M[i,0],M[i,1]))]
ax.plot(x,y,z,'k--',alpha=0.8, linewidth=0.5)
ax.scatter(x, y, z, c='k', marker='o')

How can I make a scatter plot colored by density in matplotlib?

I'd like to make a scatter plot where each point is colored by the spatial density of nearby points.
I've come across a very similar question, which shows an example of this using R:
R Scatter Plot: symbol color represents number of overlapping points
What's the best way to accomplish something similar in python using matplotlib?
In addition to hist2d or hexbin as #askewchan suggested, you can use the same method that the accepted answer in the question you linked to uses.
If you want to do that:
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import gaussian_kde
# Generate fake data
x = np.random.normal(size=1000)
y = x * 3 + np.random.normal(size=1000)
# Calculate the point density
xy = np.vstack([x,y])
z = gaussian_kde(xy)(xy)
fig, ax = plt.subplots()
ax.scatter(x, y, c=z, s=100)
plt.show()
If you'd like the points to be plotted in order of density so that the densest points are always on top (similar to the linked example), just sort them by the z-values. I'm also going to use a smaller marker size here as it looks a bit better:
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import gaussian_kde
# Generate fake data
x = np.random.normal(size=1000)
y = x * 3 + np.random.normal(size=1000)
# Calculate the point density
xy = np.vstack([x,y])
z = gaussian_kde(xy)(xy)
# Sort the points by density, so that the densest points are plotted last
idx = z.argsort()
x, y, z = x[idx], y[idx], z[idx]
fig, ax = plt.subplots()
ax.scatter(x, y, c=z, s=50)
plt.show()
Plotting >100k data points?
The accepted answer, using gaussian_kde() will take a lot of time. On my machine, 100k rows took about 11 minutes. Here I will add two alternative methods (mpl-scatter-density and datashader) and compare the given answers with same dataset.
In the following, I used a test data set of 100k rows:
import matplotlib.pyplot as plt
import numpy as np
# Fake data for testing
x = np.random.normal(size=100000)
y = x * 3 + np.random.normal(size=100000)
Output & computation time comparison
Below is a comparison of different methods.
1: mpl-scatter-density
Installation
pip install mpl-scatter-density
Example code
import mpl_scatter_density # adds projection='scatter_density'
from matplotlib.colors import LinearSegmentedColormap
# "Viridis-like" colormap with white background
white_viridis = LinearSegmentedColormap.from_list('white_viridis', [
(0, '#ffffff'),
(1e-20, '#440053'),
(0.2, '#404388'),
(0.4, '#2a788e'),
(0.6, '#21a784'),
(0.8, '#78d151'),
(1, '#fde624'),
], N=256)
def using_mpl_scatter_density(fig, x, y):
ax = fig.add_subplot(1, 1, 1, projection='scatter_density')
density = ax.scatter_density(x, y, cmap=white_viridis)
fig.colorbar(density, label='Number of points per pixel')
fig = plt.figure()
using_mpl_scatter_density(fig, x, y)
plt.show()
Drawing this took 0.05 seconds:
And the zoom-in looks quite nice:
2: datashader
Datashader is an interesting project. It has added support for matplotlib in datashader 0.12.
Installation
pip install datashader
Code (source & parameterer listing for dsshow):
import datashader as ds
from datashader.mpl_ext import dsshow
import pandas as pd
def using_datashader(ax, x, y):
df = pd.DataFrame(dict(x=x, y=y))
dsartist = dsshow(
df,
ds.Point("x", "y"),
ds.count(),
vmin=0,
vmax=35,
norm="linear",
aspect="auto",
ax=ax,
)
plt.colorbar(dsartist)
fig, ax = plt.subplots()
using_datashader(ax, x, y)
plt.show()
It took 0.83 s to draw this:
There is also possibility to colorize by third variable. The third parameter for dsshow controls the coloring. See more examples here and the source for dsshow here.
3: scatter_with_gaussian_kde
def scatter_with_gaussian_kde(ax, x, y):
# https://stackoverflow.com/a/20107592/3015186
# Answer by Joel Kington
xy = np.vstack([x, y])
z = gaussian_kde(xy)(xy)
ax.scatter(x, y, c=z, s=100, edgecolor='')
It took 11 minutes to draw this:
4: using_hist2d
import matplotlib.pyplot as plt
def using_hist2d(ax, x, y, bins=(50, 50)):
# https://stackoverflow.com/a/20105673/3015186
# Answer by askewchan
ax.hist2d(x, y, bins, cmap=plt.cm.jet)
It took 0.021 s to draw this bins=(50,50):
It took 0.173 s to draw this bins=(1000,1000):
Cons: The zoomed-in data does not look as good as in with mpl-scatter-density or datashader. Also you have to determine the number of bins yourself.
5: density_scatter
The code is as in the answer by Guillaume.
It took 0.073 s to draw this with bins=(50,50):
It took 0.368 s to draw this with bins=(1000,1000):
Also, if the number of point makes KDE calculation too slow, color can be interpolated in np.histogram2d [Update in response to comments: If you wish to show the colorbar, use plt.scatter() instead of ax.scatter() followed by plt.colorbar()]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import cm
from matplotlib.colors import Normalize
from scipy.interpolate import interpn
def density_scatter( x , y, ax = None, sort = True, bins = 20, **kwargs ) :
"""
Scatter plot colored by 2d histogram
"""
if ax is None :
fig , ax = plt.subplots()
data , x_e, y_e = np.histogram2d( x, y, bins = bins, density = True )
z = interpn( ( 0.5*(x_e[1:] + x_e[:-1]) , 0.5*(y_e[1:]+y_e[:-1]) ) , data , np.vstack([x,y]).T , method = "splinef2d", bounds_error = False)
#To be sure to plot all data
z[np.where(np.isnan(z))] = 0.0
# Sort the points by density, so that the densest points are plotted last
if sort :
idx = z.argsort()
x, y, z = x[idx], y[idx], z[idx]
ax.scatter( x, y, c=z, **kwargs )
norm = Normalize(vmin = np.min(z), vmax = np.max(z))
cbar = fig.colorbar(cm.ScalarMappable(norm = norm), ax=ax)
cbar.ax.set_ylabel('Density')
return ax
if "__main__" == __name__ :
x = np.random.normal(size=100000)
y = x * 3 + np.random.normal(size=100000)
density_scatter( x, y, bins = [30,30] )
You could make a histogram:
import numpy as np
import matplotlib.pyplot as plt
# fake data:
a = np.random.normal(size=1000)
b = a*3 + np.random.normal(size=1000)
plt.hist2d(a, b, (50, 50), cmap=plt.cm.jet)
plt.colorbar()

Scattered x,y,z via python's matplotlib.pyplot.contourf

Most pyplot examples out there use linear data, but what if data is scattered?
x = 3,7,9
y = 1,4,5
z = 20,3,7
better meshgrid for contourf
xi = np.linspace(min(x)-1, max(x)+1, 9)
yi = np.linspace(min(y)-1, max(y)+1, 9)
X, Y = np.meshgrid(xi, yi)
Now "z" data got to be interpolated onto the meshgrid.
numpy.interp does little help here, while both linear and nn interpolaton of
zi = matplotlib.mlab.griddata(x,y,z,xi,yi,interp="linear")
returns rather strange results
scipy.interpolate.griddata cubic from second answer below needs something else to return data rather than nils
With custom levels data expected be looking something like this
This is what happens:
Although contour requires grid data, we can caste scatter data to a grid and then using masked arrays mask out the blank regions. I simulate this in the code below, by creating a random array, then using this to mask a test dataset (shown at bottom). The bulk of the code is taken from this matplotlib demo page.
import matplotlib
import numpy as np
import matplotlib.mlab as mlab
import matplotlib.pyplot as plt
matplotlib.rcParams['xtick.direction'] = 'out'
matplotlib.rcParams['ytick.direction'] = 'out'
delta = 0.025
x = np.arange(-3.0, 3.0, delta)
y = np.arange(-2.0, 2.0, delta)
X, Y = np.meshgrid(x, y)
Z1 = mlab.bivariate_normal(X, Y, 1.0, 1.0, 0.0, 0.0)
Z2 = mlab.bivariate_normal(X, Y, 1.5, 0.5, 1, 1)
# difference of Gaussians
Z = 10.0 * (Z2 - Z1)
from numpy.random import *
import numpy.ma as ma
J = random_sample(X.shape)
mask = J > 0.7
X = ma.masked_array(X, mask=mask)
Y = ma.masked_array(Y, mask=mask)
Z = ma.masked_array(Z, mask=mask)
plt.figure()
CS = plt.contour(X, Y, Z, 20)
plt.clabel(CS, inline=1, fontsize=10)
plt.title('Simplest default with labels')
plt.savefig('cat.png')
plt.show()
countourf will only work with a grid of data. If you're data is scattered, then you'll need to create an interpolated grid matching your data, like this: (note you'll need scipy to perform the interpolation)
import numpy as np
from scipy.interpolate import griddata
import matplotlib.pyplot as plt
import numpy.ma as ma
from numpy.random import uniform, seed
# your data
x = [3,7,9]
y = [1,4,5]
z = [20,3,7]
# define grid.
xi = np.linspace(0,10,300)
yi = np.linspace(0,6,300)
# grid the data.
zi = griddata((x, y), z, (xi[None,:], yi[:,None]), method='cubic')
# contour the gridded data, plotting dots at the randomly spaced data points.
CS = plt.contour(xi,yi,zi,15,linewidths=0.5,colors='k')
CS = plt.contourf(xi,yi,zi,15,cmap=plt.cm.jet)
plt.colorbar() # draw colorbar
# plot data points.
plt.scatter(x,y,marker='o',c='b',s=5)
plt.xlim(min(x),max(x))
plt.ylim(min(y),max(y))
plt.title('griddata test (%d points)' % len(x))
plt.show()
See here for the origin of that code.

Categories