Can python make a tile plot? - python

Can python (eg matplotlib) make a tile plot like the following, where color indicates the intensity at each data point? Thanks!

You only need all of that machinery if you want the mouse to report back the value of the data under your mouse. To generate the image all you really need is (doc):
plt.imshow(data, interpolation='nearest')
You can control the color mapping via the cmap keyword.

Here is an example taken from http://matplotlib.org/examples/api/image_zcoord.html:
"""
Show how to modify the coordinate formatter to report the image "z"
value of the nearest pixel given x and y
"""
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.cm as cm
X = 10*np.random.rand(5,3)
fig = plt.figure()
ax = fig.add_subplot(111)
ax.imshow(X, cmap=cm.jet, interpolation='nearest')
numrows, numcols = X.shape
def format_coord(x, y):
col = int(x+0.5)
row = int(y+0.5)
if col>=0 and col<numcols and row>=0 and row<numrows:
z = X[row,col]
return 'x=%1.4f, y=%1.4f, z=%1.4f'%(x, y, z)
else:
return 'x=%1.4f, y=%1.4f'%(x, y)
ax.format_coord = format_coord
plt.show()

You are looking for image_zcode The example given is:
"""
Show how to modify the coordinate formatter to report the image "z"
value of the nearest pixel given x and y
"""
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.cm as cm
X = 10*np.random.rand(5,3)
fig = plt.figure()
ax = fig.add_subplot(111)
ax.imshow(X, cmap=cm.jet, interpolation='nearest')
numrows, numcols = X.shape
def format_coord(x, y):
col = int(x+0.5)
row = int(y+0.5)
if col>=0 and col<numcols and row>=0 and row<numrows:
z = X[row,col]
return 'x=%1.4f, y=%1.4f, z=%1.4f'%(x, y, z)
else:
return 'x=%1.4f, y=%1.4f'%(x, y)
ax.format_coord = format_coord
plt.show()

Related

Update interactive plot in Jupyter Notebook [duplicate]

I am trying to animate a pcolormesh in matplotlib. I have seen many of the examples using the package animation, most of them using a 1D plot routine, and some of them with imshow().
First, I wan to use the FuncAnimation routine. My problem is, first, that I do not know if I can initialize the plot
fig,ax = plt.subplots()
quad = ax.pcolormesh(X,Y,Z)
I have tried a few simple lines:
fig,ax = plt.subplots()
quad = ax.pcolormesh([])
def init():
quad.set_array([])
return quad,
def animate(ktime):
quad.set_array(X,Y,np.sin(Z)+ktime)
return quad,
anim = animation.FuncAnimation(fig,animate,init_func=init,frames=Ntime,interval=200,blit=True)
plt.show()
By the way, How do I set labels into and animated plot? Can I animate the title, if it is showing a number that changes in time?
Thanks
The problem was that I was wrongly using set_array() routine. It is very important to note that you must pass a 1D array to this routine. To do so, regarding that color, pcolormesh and so on usually plots multidimensional arrays, you should use .ravel() .
One more important thing: In order to animate different plots at the same time, the blitz option at animate.FuncAnimation must be False (See section "Animating selected plot elements" of this link).
Here I post the code that simple program with various subplots:
import matplotlib.pyplot as plt
import numpy as np
import matplotlib.gridspec as gridspec
import matplotlib.animation as animation
y, x = np.meshgrid(np.linspace(-10, 10,100), np.linspace(-10, 10,100))
z = np.sin(x)*np.sin(x)+np.sin(y)*np.sin(y)
v = np.linspace(-10, 10,100)
t = np.sin(v)*np.sin(v)
tt = np.cos(v)*np.cos(v)
###########
fig = plt.figure(figsize=(16, 8),facecolor='white')
gs = gridspec.GridSpec(5, 2)
ax1 = plt.subplot(gs[0,0])
line, = ax1.plot([],[],'b-.',linewidth=2)
ax1.set_xlim(-10,10)
ax1.set_ylim(0,1)
ax1.set_xlabel('time')
ax1.set_ylabel('amplitude')
ax1.set_title('Oscillationsssss')
time_text = ax1.text(0.02, 0.95, '', transform=ax1.transAxes)
#############################
ax2 = plt.subplot(gs[1:3,0])
quad1 = ax2.pcolormesh(x,y,z,shading='gouraud')
ax2.set_xlabel('time')
ax2.set_ylabel('amplitude')
cb2 = fig.colorbar(quad1,ax=ax2)
#########################
ax3 = plt.subplot(gs[3:,0])
quad2 = ax3.pcolormesh(x, y, z,shading='gouraud')
ax3.set_xlabel('time')
ax3.set_ylabel('amplitude')
cb3 = fig.colorbar(quad2,ax=ax3)
############################
ax4 = plt.subplot(gs[:,1])
line2, = ax4.plot(v,tt,'b',linewidth=2)
ax4.set_xlim(-10,10)
ax4.set_ylim(0,1)
def init():
line.set_data([],[])
line2.set_data([],[])
quad1.set_array([])
return line,line2,quad1
def animate(iter):
t = np.sin(2*v-iter/(2*np.pi))*np.sin(2*v-iter/(2*np.pi))
tt = np.cos(2*v-iter/(2*np.pi))*np.cos(2*v-iter/(2*np.pi))
z = np.sin(x-iter/(2*np.pi))*np.sin(x-iter/(2*np.pi))+np.sin(y)*np.sin(y)
line.set_data(v,t)
quad1.set_array(z.ravel())
line2.set_data(v,tt)
return line,line2,quad1
gs.tight_layout(fig)
anim = animation.FuncAnimation(fig,animate,frames=100,interval=50,blit=False,repeat=False)
plt.show()
print 'Finished!!'
There is an ugly detail you need to take care when using QuadMesh.set_array(). If you intantiate your QuadMesh with X, Y and C you can update the values C by using set_array(). But set_array does not support the same input as the constructor. Reading the source reveals that you need to pass a 1d-array and what is even more puzzling is that depending on the shading setting you might need to cut of your array C.
Edit: There is even a very old bug report about the confusing array size for shading='flat'.
That means:
Using QuadMesh.set_array() with shading = 'flat'
'flat' is default value for shading.
# preperation
import numpy as np
import matplotlib.pyplot as plt
plt.ion()
y = np.linspace(-10, 10, num=1000)
x = np.linspace(-10, 10, num=1000)
X, Y = np.meshgrid(x, y)
C = np.ones((1000, 1000)) * float('nan')
# intantiate empty plot (values = nan)
pcmesh = plt.pcolormesh(X, Y, C, vmin=-100, vmax=100, shading='flat')
# generate some new data
C = X * Y
# necessary for shading='flat'
C = C[:-1, :-1]
# ravel() converts C to a 1d-array
pcmesh.set_array(C.ravel())
# redraw to update plot with new data
plt.draw()
Looks like:
Note that if you omit C = C[:-1, :-1] your will get this broken graphic:
Using QuadMesh.set_array() with shading = 'gouraud'
# preperation (same as for 'flat')
import numpy as np
import matplotlib.pyplot as plt
plt.ion()
y = np.linspace(-10, 10, num=1000)
x = np.linspace(-10, 10, num=1000)
X, Y = np.meshgrid(x, y)
C = np.ones((1000, 1000)) * float('nan')
# intantiate empty plot (values = nan)
pcmesh = plt.pcolormesh(X, Y, C, vmin=-100, vmax=100, shading='gouraud')
# generate some new data
C = X * Y
# here no cut of of last row/column!
# ravel() converts C to a 1d-array
pcmesh.set_array(C.ravel())
# redraw to update plot with new data
plt.draw()
If you cut off the last row/column with shade='gouraud' you will get:
ValueError: total size of new array must be unchanged
I am not sure why your quad = ax.pcolormesh(X,Y,Z) function is giving an error. Can you post the error?
Below is what I would do to create a simple animation using pcolormesh:
import matplotlib.pyplot as plt
import numpy as np
y, x = np.meshgrid(np.linspace(-3, 3,100), np.linspace(-3, 3,100))
z = np.sin(x**2+y**2)
z = z[:-1, :-1]
ax = plt.subplot(111)
quad = plt.pcolormesh(x, y, z)
plt.colorbar()
plt.ion()
plt.show()
for phase in np.linspace(0,10*np.pi,200):
z = np.sin(np.sqrt(x**2+y**2) + phase)
z = z[:-1, :-1]
quad.set_array(z.ravel())
plt.title('Phase: %.2f'%phase)
plt.draw()
plt.ioff()
plt.show()
One of the frames:
Does this help? If not, maybe you can clarify the question.
There is another answer presented here that looks simpler thus better (IMHO)
Here is a copy & paste of the alternative solution :
import matplotlib.pylab as plt
from matplotlib import animation
fig = plt.figure()
plt.hold(True)
#We need to prime the pump, so to speak and create a quadmesh for plt to work with
plt.pcolormesh(X[0:1], Y[0:1], C[0:1])
anim = animation.FuncAnimation(fig, animate, frames = range(2,155), blit = False)
plt.show()
plt.hold(False)
def animate( self, i):
plt.title('Ray: %.2f'%i)
#This is where new data is inserted into the plot.
plt.pcolormesh(X[i-2:i], Y[i-2:i], C[i-2:i])

Plotting scatter density plots in python [duplicate]

I'd like to make a scatter plot where each point is colored by the spatial density of nearby points.
I've come across a very similar question, which shows an example of this using R:
R Scatter Plot: symbol color represents number of overlapping points
What's the best way to accomplish something similar in python using matplotlib?
In addition to hist2d or hexbin as #askewchan suggested, you can use the same method that the accepted answer in the question you linked to uses.
If you want to do that:
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import gaussian_kde
# Generate fake data
x = np.random.normal(size=1000)
y = x * 3 + np.random.normal(size=1000)
# Calculate the point density
xy = np.vstack([x,y])
z = gaussian_kde(xy)(xy)
fig, ax = plt.subplots()
ax.scatter(x, y, c=z, s=100)
plt.show()
If you'd like the points to be plotted in order of density so that the densest points are always on top (similar to the linked example), just sort them by the z-values. I'm also going to use a smaller marker size here as it looks a bit better:
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import gaussian_kde
# Generate fake data
x = np.random.normal(size=1000)
y = x * 3 + np.random.normal(size=1000)
# Calculate the point density
xy = np.vstack([x,y])
z = gaussian_kde(xy)(xy)
# Sort the points by density, so that the densest points are plotted last
idx = z.argsort()
x, y, z = x[idx], y[idx], z[idx]
fig, ax = plt.subplots()
ax.scatter(x, y, c=z, s=50)
plt.show()
Plotting >100k data points?
The accepted answer, using gaussian_kde() will take a lot of time. On my machine, 100k rows took about 11 minutes. Here I will add two alternative methods (mpl-scatter-density and datashader) and compare the given answers with same dataset.
In the following, I used a test data set of 100k rows:
import matplotlib.pyplot as plt
import numpy as np
# Fake data for testing
x = np.random.normal(size=100000)
y = x * 3 + np.random.normal(size=100000)
Output & computation time comparison
Below is a comparison of different methods.
1: mpl-scatter-density
Installation
pip install mpl-scatter-density
Example code
import mpl_scatter_density # adds projection='scatter_density'
from matplotlib.colors import LinearSegmentedColormap
# "Viridis-like" colormap with white background
white_viridis = LinearSegmentedColormap.from_list('white_viridis', [
(0, '#ffffff'),
(1e-20, '#440053'),
(0.2, '#404388'),
(0.4, '#2a788e'),
(0.6, '#21a784'),
(0.8, '#78d151'),
(1, '#fde624'),
], N=256)
def using_mpl_scatter_density(fig, x, y):
ax = fig.add_subplot(1, 1, 1, projection='scatter_density')
density = ax.scatter_density(x, y, cmap=white_viridis)
fig.colorbar(density, label='Number of points per pixel')
fig = plt.figure()
using_mpl_scatter_density(fig, x, y)
plt.show()
Drawing this took 0.05 seconds:
And the zoom-in looks quite nice:
2: datashader
Datashader is an interesting project. It has added support for matplotlib in datashader 0.12.
Installation
pip install datashader
Code (source & parameterer listing for dsshow):
import datashader as ds
from datashader.mpl_ext import dsshow
import pandas as pd
def using_datashader(ax, x, y):
df = pd.DataFrame(dict(x=x, y=y))
dsartist = dsshow(
df,
ds.Point("x", "y"),
ds.count(),
vmin=0,
vmax=35,
norm="linear",
aspect="auto",
ax=ax,
)
plt.colorbar(dsartist)
fig, ax = plt.subplots()
using_datashader(ax, x, y)
plt.show()
It took 0.83 s to draw this:
There is also possibility to colorize by third variable. The third parameter for dsshow controls the coloring. See more examples here and the source for dsshow here.
3: scatter_with_gaussian_kde
def scatter_with_gaussian_kde(ax, x, y):
# https://stackoverflow.com/a/20107592/3015186
# Answer by Joel Kington
xy = np.vstack([x, y])
z = gaussian_kde(xy)(xy)
ax.scatter(x, y, c=z, s=100, edgecolor='')
It took 11 minutes to draw this:
4: using_hist2d
import matplotlib.pyplot as plt
def using_hist2d(ax, x, y, bins=(50, 50)):
# https://stackoverflow.com/a/20105673/3015186
# Answer by askewchan
ax.hist2d(x, y, bins, cmap=plt.cm.jet)
It took 0.021 s to draw this bins=(50,50):
It took 0.173 s to draw this bins=(1000,1000):
Cons: The zoomed-in data does not look as good as in with mpl-scatter-density or datashader. Also you have to determine the number of bins yourself.
5: density_scatter
The code is as in the answer by Guillaume.
It took 0.073 s to draw this with bins=(50,50):
It took 0.368 s to draw this with bins=(1000,1000):
Also, if the number of point makes KDE calculation too slow, color can be interpolated in np.histogram2d [Update in response to comments: If you wish to show the colorbar, use plt.scatter() instead of ax.scatter() followed by plt.colorbar()]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import cm
from matplotlib.colors import Normalize
from scipy.interpolate import interpn
def density_scatter( x , y, ax = None, sort = True, bins = 20, **kwargs ) :
"""
Scatter plot colored by 2d histogram
"""
if ax is None :
fig , ax = plt.subplots()
data , x_e, y_e = np.histogram2d( x, y, bins = bins, density = True )
z = interpn( ( 0.5*(x_e[1:] + x_e[:-1]) , 0.5*(y_e[1:]+y_e[:-1]) ) , data , np.vstack([x,y]).T , method = "splinef2d", bounds_error = False)
#To be sure to plot all data
z[np.where(np.isnan(z))] = 0.0
# Sort the points by density, so that the densest points are plotted last
if sort :
idx = z.argsort()
x, y, z = x[idx], y[idx], z[idx]
ax.scatter( x, y, c=z, **kwargs )
norm = Normalize(vmin = np.min(z), vmax = np.max(z))
cbar = fig.colorbar(cm.ScalarMappable(norm = norm), ax=ax)
cbar.ax.set_ylabel('Density')
return ax
if "__main__" == __name__ :
x = np.random.normal(size=100000)
y = x * 3 + np.random.normal(size=100000)
density_scatter( x, y, bins = [30,30] )
You could make a histogram:
import numpy as np
import matplotlib.pyplot as plt
# fake data:
a = np.random.normal(size=1000)
b = a*3 + np.random.normal(size=1000)
plt.hist2d(a, b, (50, 50), cmap=plt.cm.jet)
plt.colorbar()

Plot a surface with lists of latitude, longitude and elevation data (hillshading)

I found this script on the matplotlib website:
"""
Demonstrates using custom hillshading in a 3D surface plot.
"""
from mpl_toolkits.mplot3d import Axes3D
from matplotlib import cbook
from matplotlib import cm
from matplotlib.colors import LightSource
import matplotlib.pyplot as plt
import numpy as np
filename = cbook.get_sample_data('jacksboro_fault_dem.npz', asfileobj=False)
with np.load(filename) as dem:
z = dem['elevation']
nrows, ncols = z.shape
x = np.linspace(dem['xmin'], dem['xmax'], ncols)
y = np.linspace(dem['ymin'], dem['ymax'], nrows)
x, y = np.meshgrid(x, y)
region = np.s_[5:50, 5:50]
x, y, z = x[region], y[region], z[region]
fig, ax = plt.subplots(subplot_kw=dict(projection='3d'))
ls = LightSource(270, 45)
# To use a custom hillshading mode, override the built-in shading and pass
# in the rgb colors of the shaded surface calculated from "shade".
rgb = ls.shade(z, cmap=cm.gist_earth, vert_exag=0.1, blend_mode='soft')
surf = ax.plot_surface(x, y, z, rstride=1, cstride=1, facecolors=rgb,
linewidth=0, antialiased=False, shade=False)
plt.show()
They use the file jacksboro_fault_dem.npz to plot the elevation data and they get something like that:
Thanks to Google Earth I was able to get the text file maido_elevation_data.txt with latitude, longitude and elevation data of the following area (Maïdo, Reunion Island):
I made a function to get 3 lists for each coordinate from the text file:
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt
def get_LAT_LONG_ALT(text_file):
ch=""
LAT=[]
LONG=[]
ALT=[]
with open(text_file,"r") as fich:
for ligne in fich:
for e in ligne:
ch+=e
liste=ch.replace("\n","").split("\t")
LAT.append(float(liste[0]))
LONG.append(float(liste[1]))
ALT.append(float(liste[2]))
ch=""
return LAT,LONG,ALT
fig = plt.figure()
axes = fig.add_subplot(111, projection="3d")
X = get_LAT_LONG_ALT("maido_elevation_data.txt")[0]
Y = get_LAT_LONG_ALT("maido_elevation_data.txt")[1]
Z = get_LAT_LONG_ALT("maido_elevation_data.txt")[2]
axes.scatter(X,Y,Z, c="r", marker="o")
axes.set_xlabel("Latitude")
axes.set_ylabel("Longitude")
axes.set_zlabel("Altitude")
plt.show()
How should I modify the script to get a good surface plot with my own data like they do?
PS: I will give you the links of the files in the comments because I'm not allowed to put more than 2 links... yes, I'm new :)
You should reshape your data it is a three column data x,y and z
You should have a file with only z values in a 2D table columns are x and rows are y.
Meshgrid fucntion in python should help.

Change RGB color in matplotlib animation

I seems that it is not possible to change colors of a Matplotlib scatter plot through a RGB definition. Am I wrong?
Here is a code (already given in stack overflow) which work with colors indexed in float:
import matplotlib.pyplot as plt
import numpy as np
import matplotlib.animation as animation
def main():
numframes = 100
numpoints = 10
color_data = np.random.random((numframes, numpoints))
x, y, c = np.random.random((3, numpoints))
fig = plt.figure()
scat = plt.scatter(x, y, c=c, s=100)
ani = animation.FuncAnimation(fig, update_plot, frames=range(numframes),
fargs=(color_data, scat))
plt.show()
def update_plot(i, data, scat):
scat.set_array(data[i])
return scat,
main()
But if color_data is defined through RGB colors, I get an error:
ValueError: Collections can only map rank 1 arrays
The related code is the following (in this code, I just change the color of one sample each time):
import matplotlib.pyplot as plt
import numpy as np
import matplotlib.animation as animation
def main():
numframes = 100
numpoints = 10
rgb_color_data = np.random.random((numpoints, 3))
x, y = np.random.random((2, numpoints))
fig = plt.figure()
scat = plt.scatter(x, y, c=rgb_color_data, s=100) #this work well at this level
ani = animation.FuncAnimation(fig, update_plot2, frames=range(numframes),
fargs=(rgb_color_data, scat))
plt.show()
def update_plot2(i,data,scat):
data[ i%10 ] = np.random.random((3))
scat.set_array(data) # this fails
return scat,
main()
Is there a means to use set_array with RGB color array?
Not sure what you are trying to achieve. But if you are trying to change the color, why not use the set_color() function of Collection?
def update_plot2(i,data,scat):
data[ i%10 ] = np.random.random((3))
scat.set_color(data) # <<<<<<<<<<<<<<<<<<<
return scat,

How can I make a scatter plot colored by density in matplotlib?

I'd like to make a scatter plot where each point is colored by the spatial density of nearby points.
I've come across a very similar question, which shows an example of this using R:
R Scatter Plot: symbol color represents number of overlapping points
What's the best way to accomplish something similar in python using matplotlib?
In addition to hist2d or hexbin as #askewchan suggested, you can use the same method that the accepted answer in the question you linked to uses.
If you want to do that:
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import gaussian_kde
# Generate fake data
x = np.random.normal(size=1000)
y = x * 3 + np.random.normal(size=1000)
# Calculate the point density
xy = np.vstack([x,y])
z = gaussian_kde(xy)(xy)
fig, ax = plt.subplots()
ax.scatter(x, y, c=z, s=100)
plt.show()
If you'd like the points to be plotted in order of density so that the densest points are always on top (similar to the linked example), just sort them by the z-values. I'm also going to use a smaller marker size here as it looks a bit better:
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import gaussian_kde
# Generate fake data
x = np.random.normal(size=1000)
y = x * 3 + np.random.normal(size=1000)
# Calculate the point density
xy = np.vstack([x,y])
z = gaussian_kde(xy)(xy)
# Sort the points by density, so that the densest points are plotted last
idx = z.argsort()
x, y, z = x[idx], y[idx], z[idx]
fig, ax = plt.subplots()
ax.scatter(x, y, c=z, s=50)
plt.show()
Plotting >100k data points?
The accepted answer, using gaussian_kde() will take a lot of time. On my machine, 100k rows took about 11 minutes. Here I will add two alternative methods (mpl-scatter-density and datashader) and compare the given answers with same dataset.
In the following, I used a test data set of 100k rows:
import matplotlib.pyplot as plt
import numpy as np
# Fake data for testing
x = np.random.normal(size=100000)
y = x * 3 + np.random.normal(size=100000)
Output & computation time comparison
Below is a comparison of different methods.
1: mpl-scatter-density
Installation
pip install mpl-scatter-density
Example code
import mpl_scatter_density # adds projection='scatter_density'
from matplotlib.colors import LinearSegmentedColormap
# "Viridis-like" colormap with white background
white_viridis = LinearSegmentedColormap.from_list('white_viridis', [
(0, '#ffffff'),
(1e-20, '#440053'),
(0.2, '#404388'),
(0.4, '#2a788e'),
(0.6, '#21a784'),
(0.8, '#78d151'),
(1, '#fde624'),
], N=256)
def using_mpl_scatter_density(fig, x, y):
ax = fig.add_subplot(1, 1, 1, projection='scatter_density')
density = ax.scatter_density(x, y, cmap=white_viridis)
fig.colorbar(density, label='Number of points per pixel')
fig = plt.figure()
using_mpl_scatter_density(fig, x, y)
plt.show()
Drawing this took 0.05 seconds:
And the zoom-in looks quite nice:
2: datashader
Datashader is an interesting project. It has added support for matplotlib in datashader 0.12.
Installation
pip install datashader
Code (source & parameterer listing for dsshow):
import datashader as ds
from datashader.mpl_ext import dsshow
import pandas as pd
def using_datashader(ax, x, y):
df = pd.DataFrame(dict(x=x, y=y))
dsartist = dsshow(
df,
ds.Point("x", "y"),
ds.count(),
vmin=0,
vmax=35,
norm="linear",
aspect="auto",
ax=ax,
)
plt.colorbar(dsartist)
fig, ax = plt.subplots()
using_datashader(ax, x, y)
plt.show()
It took 0.83 s to draw this:
There is also possibility to colorize by third variable. The third parameter for dsshow controls the coloring. See more examples here and the source for dsshow here.
3: scatter_with_gaussian_kde
def scatter_with_gaussian_kde(ax, x, y):
# https://stackoverflow.com/a/20107592/3015186
# Answer by Joel Kington
xy = np.vstack([x, y])
z = gaussian_kde(xy)(xy)
ax.scatter(x, y, c=z, s=100, edgecolor='')
It took 11 minutes to draw this:
4: using_hist2d
import matplotlib.pyplot as plt
def using_hist2d(ax, x, y, bins=(50, 50)):
# https://stackoverflow.com/a/20105673/3015186
# Answer by askewchan
ax.hist2d(x, y, bins, cmap=plt.cm.jet)
It took 0.021 s to draw this bins=(50,50):
It took 0.173 s to draw this bins=(1000,1000):
Cons: The zoomed-in data does not look as good as in with mpl-scatter-density or datashader. Also you have to determine the number of bins yourself.
5: density_scatter
The code is as in the answer by Guillaume.
It took 0.073 s to draw this with bins=(50,50):
It took 0.368 s to draw this with bins=(1000,1000):
Also, if the number of point makes KDE calculation too slow, color can be interpolated in np.histogram2d [Update in response to comments: If you wish to show the colorbar, use plt.scatter() instead of ax.scatter() followed by plt.colorbar()]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import cm
from matplotlib.colors import Normalize
from scipy.interpolate import interpn
def density_scatter( x , y, ax = None, sort = True, bins = 20, **kwargs ) :
"""
Scatter plot colored by 2d histogram
"""
if ax is None :
fig , ax = plt.subplots()
data , x_e, y_e = np.histogram2d( x, y, bins = bins, density = True )
z = interpn( ( 0.5*(x_e[1:] + x_e[:-1]) , 0.5*(y_e[1:]+y_e[:-1]) ) , data , np.vstack([x,y]).T , method = "splinef2d", bounds_error = False)
#To be sure to plot all data
z[np.where(np.isnan(z))] = 0.0
# Sort the points by density, so that the densest points are plotted last
if sort :
idx = z.argsort()
x, y, z = x[idx], y[idx], z[idx]
ax.scatter( x, y, c=z, **kwargs )
norm = Normalize(vmin = np.min(z), vmax = np.max(z))
cbar = fig.colorbar(cm.ScalarMappable(norm = norm), ax=ax)
cbar.ax.set_ylabel('Density')
return ax
if "__main__" == __name__ :
x = np.random.normal(size=100000)
y = x * 3 + np.random.normal(size=100000)
density_scatter( x, y, bins = [30,30] )
You could make a histogram:
import numpy as np
import matplotlib.pyplot as plt
# fake data:
a = np.random.normal(size=1000)
b = a*3 + np.random.normal(size=1000)
plt.hist2d(a, b, (50, 50), cmap=plt.cm.jet)
plt.colorbar()

Categories