Related
I am trying to animate a pcolormesh in matplotlib. I have seen many of the examples using the package animation, most of them using a 1D plot routine, and some of them with imshow().
First, I wan to use the FuncAnimation routine. My problem is, first, that I do not know if I can initialize the plot
fig,ax = plt.subplots()
quad = ax.pcolormesh(X,Y,Z)
I have tried a few simple lines:
fig,ax = plt.subplots()
quad = ax.pcolormesh([])
def init():
quad.set_array([])
return quad,
def animate(ktime):
quad.set_array(X,Y,np.sin(Z)+ktime)
return quad,
anim = animation.FuncAnimation(fig,animate,init_func=init,frames=Ntime,interval=200,blit=True)
plt.show()
By the way, How do I set labels into and animated plot? Can I animate the title, if it is showing a number that changes in time?
Thanks
The problem was that I was wrongly using set_array() routine. It is very important to note that you must pass a 1D array to this routine. To do so, regarding that color, pcolormesh and so on usually plots multidimensional arrays, you should use .ravel() .
One more important thing: In order to animate different plots at the same time, the blitz option at animate.FuncAnimation must be False (See section "Animating selected plot elements" of this link).
Here I post the code that simple program with various subplots:
import matplotlib.pyplot as plt
import numpy as np
import matplotlib.gridspec as gridspec
import matplotlib.animation as animation
y, x = np.meshgrid(np.linspace(-10, 10,100), np.linspace(-10, 10,100))
z = np.sin(x)*np.sin(x)+np.sin(y)*np.sin(y)
v = np.linspace(-10, 10,100)
t = np.sin(v)*np.sin(v)
tt = np.cos(v)*np.cos(v)
###########
fig = plt.figure(figsize=(16, 8),facecolor='white')
gs = gridspec.GridSpec(5, 2)
ax1 = plt.subplot(gs[0,0])
line, = ax1.plot([],[],'b-.',linewidth=2)
ax1.set_xlim(-10,10)
ax1.set_ylim(0,1)
ax1.set_xlabel('time')
ax1.set_ylabel('amplitude')
ax1.set_title('Oscillationsssss')
time_text = ax1.text(0.02, 0.95, '', transform=ax1.transAxes)
#############################
ax2 = plt.subplot(gs[1:3,0])
quad1 = ax2.pcolormesh(x,y,z,shading='gouraud')
ax2.set_xlabel('time')
ax2.set_ylabel('amplitude')
cb2 = fig.colorbar(quad1,ax=ax2)
#########################
ax3 = plt.subplot(gs[3:,0])
quad2 = ax3.pcolormesh(x, y, z,shading='gouraud')
ax3.set_xlabel('time')
ax3.set_ylabel('amplitude')
cb3 = fig.colorbar(quad2,ax=ax3)
############################
ax4 = plt.subplot(gs[:,1])
line2, = ax4.plot(v,tt,'b',linewidth=2)
ax4.set_xlim(-10,10)
ax4.set_ylim(0,1)
def init():
line.set_data([],[])
line2.set_data([],[])
quad1.set_array([])
return line,line2,quad1
def animate(iter):
t = np.sin(2*v-iter/(2*np.pi))*np.sin(2*v-iter/(2*np.pi))
tt = np.cos(2*v-iter/(2*np.pi))*np.cos(2*v-iter/(2*np.pi))
z = np.sin(x-iter/(2*np.pi))*np.sin(x-iter/(2*np.pi))+np.sin(y)*np.sin(y)
line.set_data(v,t)
quad1.set_array(z.ravel())
line2.set_data(v,tt)
return line,line2,quad1
gs.tight_layout(fig)
anim = animation.FuncAnimation(fig,animate,frames=100,interval=50,blit=False,repeat=False)
plt.show()
print 'Finished!!'
There is an ugly detail you need to take care when using QuadMesh.set_array(). If you intantiate your QuadMesh with X, Y and C you can update the values C by using set_array(). But set_array does not support the same input as the constructor. Reading the source reveals that you need to pass a 1d-array and what is even more puzzling is that depending on the shading setting you might need to cut of your array C.
Edit: There is even a very old bug report about the confusing array size for shading='flat'.
That means:
Using QuadMesh.set_array() with shading = 'flat'
'flat' is default value for shading.
# preperation
import numpy as np
import matplotlib.pyplot as plt
plt.ion()
y = np.linspace(-10, 10, num=1000)
x = np.linspace(-10, 10, num=1000)
X, Y = np.meshgrid(x, y)
C = np.ones((1000, 1000)) * float('nan')
# intantiate empty plot (values = nan)
pcmesh = plt.pcolormesh(X, Y, C, vmin=-100, vmax=100, shading='flat')
# generate some new data
C = X * Y
# necessary for shading='flat'
C = C[:-1, :-1]
# ravel() converts C to a 1d-array
pcmesh.set_array(C.ravel())
# redraw to update plot with new data
plt.draw()
Looks like:
Note that if you omit C = C[:-1, :-1] your will get this broken graphic:
Using QuadMesh.set_array() with shading = 'gouraud'
# preperation (same as for 'flat')
import numpy as np
import matplotlib.pyplot as plt
plt.ion()
y = np.linspace(-10, 10, num=1000)
x = np.linspace(-10, 10, num=1000)
X, Y = np.meshgrid(x, y)
C = np.ones((1000, 1000)) * float('nan')
# intantiate empty plot (values = nan)
pcmesh = plt.pcolormesh(X, Y, C, vmin=-100, vmax=100, shading='gouraud')
# generate some new data
C = X * Y
# here no cut of of last row/column!
# ravel() converts C to a 1d-array
pcmesh.set_array(C.ravel())
# redraw to update plot with new data
plt.draw()
If you cut off the last row/column with shade='gouraud' you will get:
ValueError: total size of new array must be unchanged
I am not sure why your quad = ax.pcolormesh(X,Y,Z) function is giving an error. Can you post the error?
Below is what I would do to create a simple animation using pcolormesh:
import matplotlib.pyplot as plt
import numpy as np
y, x = np.meshgrid(np.linspace(-3, 3,100), np.linspace(-3, 3,100))
z = np.sin(x**2+y**2)
z = z[:-1, :-1]
ax = plt.subplot(111)
quad = plt.pcolormesh(x, y, z)
plt.colorbar()
plt.ion()
plt.show()
for phase in np.linspace(0,10*np.pi,200):
z = np.sin(np.sqrt(x**2+y**2) + phase)
z = z[:-1, :-1]
quad.set_array(z.ravel())
plt.title('Phase: %.2f'%phase)
plt.draw()
plt.ioff()
plt.show()
One of the frames:
Does this help? If not, maybe you can clarify the question.
There is another answer presented here that looks simpler thus better (IMHO)
Here is a copy & paste of the alternative solution :
import matplotlib.pylab as plt
from matplotlib import animation
fig = plt.figure()
plt.hold(True)
#We need to prime the pump, so to speak and create a quadmesh for plt to work with
plt.pcolormesh(X[0:1], Y[0:1], C[0:1])
anim = animation.FuncAnimation(fig, animate, frames = range(2,155), blit = False)
plt.show()
plt.hold(False)
def animate( self, i):
plt.title('Ray: %.2f'%i)
#This is where new data is inserted into the plot.
plt.pcolormesh(X[i-2:i], Y[i-2:i], C[i-2:i])
I'd like to make a scatter plot where each point is colored by the spatial density of nearby points.
I've come across a very similar question, which shows an example of this using R:
R Scatter Plot: symbol color represents number of overlapping points
What's the best way to accomplish something similar in python using matplotlib?
In addition to hist2d or hexbin as #askewchan suggested, you can use the same method that the accepted answer in the question you linked to uses.
If you want to do that:
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import gaussian_kde
# Generate fake data
x = np.random.normal(size=1000)
y = x * 3 + np.random.normal(size=1000)
# Calculate the point density
xy = np.vstack([x,y])
z = gaussian_kde(xy)(xy)
fig, ax = plt.subplots()
ax.scatter(x, y, c=z, s=100)
plt.show()
If you'd like the points to be plotted in order of density so that the densest points are always on top (similar to the linked example), just sort them by the z-values. I'm also going to use a smaller marker size here as it looks a bit better:
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import gaussian_kde
# Generate fake data
x = np.random.normal(size=1000)
y = x * 3 + np.random.normal(size=1000)
# Calculate the point density
xy = np.vstack([x,y])
z = gaussian_kde(xy)(xy)
# Sort the points by density, so that the densest points are plotted last
idx = z.argsort()
x, y, z = x[idx], y[idx], z[idx]
fig, ax = plt.subplots()
ax.scatter(x, y, c=z, s=50)
plt.show()
Plotting >100k data points?
The accepted answer, using gaussian_kde() will take a lot of time. On my machine, 100k rows took about 11 minutes. Here I will add two alternative methods (mpl-scatter-density and datashader) and compare the given answers with same dataset.
In the following, I used a test data set of 100k rows:
import matplotlib.pyplot as plt
import numpy as np
# Fake data for testing
x = np.random.normal(size=100000)
y = x * 3 + np.random.normal(size=100000)
Output & computation time comparison
Below is a comparison of different methods.
1: mpl-scatter-density
Installation
pip install mpl-scatter-density
Example code
import mpl_scatter_density # adds projection='scatter_density'
from matplotlib.colors import LinearSegmentedColormap
# "Viridis-like" colormap with white background
white_viridis = LinearSegmentedColormap.from_list('white_viridis', [
(0, '#ffffff'),
(1e-20, '#440053'),
(0.2, '#404388'),
(0.4, '#2a788e'),
(0.6, '#21a784'),
(0.8, '#78d151'),
(1, '#fde624'),
], N=256)
def using_mpl_scatter_density(fig, x, y):
ax = fig.add_subplot(1, 1, 1, projection='scatter_density')
density = ax.scatter_density(x, y, cmap=white_viridis)
fig.colorbar(density, label='Number of points per pixel')
fig = plt.figure()
using_mpl_scatter_density(fig, x, y)
plt.show()
Drawing this took 0.05 seconds:
And the zoom-in looks quite nice:
2: datashader
Datashader is an interesting project. It has added support for matplotlib in datashader 0.12.
Installation
pip install datashader
Code (source & parameterer listing for dsshow):
import datashader as ds
from datashader.mpl_ext import dsshow
import pandas as pd
def using_datashader(ax, x, y):
df = pd.DataFrame(dict(x=x, y=y))
dsartist = dsshow(
df,
ds.Point("x", "y"),
ds.count(),
vmin=0,
vmax=35,
norm="linear",
aspect="auto",
ax=ax,
)
plt.colorbar(dsartist)
fig, ax = plt.subplots()
using_datashader(ax, x, y)
plt.show()
It took 0.83 s to draw this:
There is also possibility to colorize by third variable. The third parameter for dsshow controls the coloring. See more examples here and the source for dsshow here.
3: scatter_with_gaussian_kde
def scatter_with_gaussian_kde(ax, x, y):
# https://stackoverflow.com/a/20107592/3015186
# Answer by Joel Kington
xy = np.vstack([x, y])
z = gaussian_kde(xy)(xy)
ax.scatter(x, y, c=z, s=100, edgecolor='')
It took 11 minutes to draw this:
4: using_hist2d
import matplotlib.pyplot as plt
def using_hist2d(ax, x, y, bins=(50, 50)):
# https://stackoverflow.com/a/20105673/3015186
# Answer by askewchan
ax.hist2d(x, y, bins, cmap=plt.cm.jet)
It took 0.021 s to draw this bins=(50,50):
It took 0.173 s to draw this bins=(1000,1000):
Cons: The zoomed-in data does not look as good as in with mpl-scatter-density or datashader. Also you have to determine the number of bins yourself.
5: density_scatter
The code is as in the answer by Guillaume.
It took 0.073 s to draw this with bins=(50,50):
It took 0.368 s to draw this with bins=(1000,1000):
Also, if the number of point makes KDE calculation too slow, color can be interpolated in np.histogram2d [Update in response to comments: If you wish to show the colorbar, use plt.scatter() instead of ax.scatter() followed by plt.colorbar()]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import cm
from matplotlib.colors import Normalize
from scipy.interpolate import interpn
def density_scatter( x , y, ax = None, sort = True, bins = 20, **kwargs ) :
"""
Scatter plot colored by 2d histogram
"""
if ax is None :
fig , ax = plt.subplots()
data , x_e, y_e = np.histogram2d( x, y, bins = bins, density = True )
z = interpn( ( 0.5*(x_e[1:] + x_e[:-1]) , 0.5*(y_e[1:]+y_e[:-1]) ) , data , np.vstack([x,y]).T , method = "splinef2d", bounds_error = False)
#To be sure to plot all data
z[np.where(np.isnan(z))] = 0.0
# Sort the points by density, so that the densest points are plotted last
if sort :
idx = z.argsort()
x, y, z = x[idx], y[idx], z[idx]
ax.scatter( x, y, c=z, **kwargs )
norm = Normalize(vmin = np.min(z), vmax = np.max(z))
cbar = fig.colorbar(cm.ScalarMappable(norm = norm), ax=ax)
cbar.ax.set_ylabel('Density')
return ax
if "__main__" == __name__ :
x = np.random.normal(size=100000)
y = x * 3 + np.random.normal(size=100000)
density_scatter( x, y, bins = [30,30] )
You could make a histogram:
import numpy as np
import matplotlib.pyplot as plt
# fake data:
a = np.random.normal(size=1000)
b = a*3 + np.random.normal(size=1000)
plt.hist2d(a, b, (50, 50), cmap=plt.cm.jet)
plt.colorbar()
I found this script on the matplotlib website:
"""
Demonstrates using custom hillshading in a 3D surface plot.
"""
from mpl_toolkits.mplot3d import Axes3D
from matplotlib import cbook
from matplotlib import cm
from matplotlib.colors import LightSource
import matplotlib.pyplot as plt
import numpy as np
filename = cbook.get_sample_data('jacksboro_fault_dem.npz', asfileobj=False)
with np.load(filename) as dem:
z = dem['elevation']
nrows, ncols = z.shape
x = np.linspace(dem['xmin'], dem['xmax'], ncols)
y = np.linspace(dem['ymin'], dem['ymax'], nrows)
x, y = np.meshgrid(x, y)
region = np.s_[5:50, 5:50]
x, y, z = x[region], y[region], z[region]
fig, ax = plt.subplots(subplot_kw=dict(projection='3d'))
ls = LightSource(270, 45)
# To use a custom hillshading mode, override the built-in shading and pass
# in the rgb colors of the shaded surface calculated from "shade".
rgb = ls.shade(z, cmap=cm.gist_earth, vert_exag=0.1, blend_mode='soft')
surf = ax.plot_surface(x, y, z, rstride=1, cstride=1, facecolors=rgb,
linewidth=0, antialiased=False, shade=False)
plt.show()
They use the file jacksboro_fault_dem.npz to plot the elevation data and they get something like that:
Thanks to Google Earth I was able to get the text file maido_elevation_data.txt with latitude, longitude and elevation data of the following area (Maïdo, Reunion Island):
I made a function to get 3 lists for each coordinate from the text file:
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt
def get_LAT_LONG_ALT(text_file):
ch=""
LAT=[]
LONG=[]
ALT=[]
with open(text_file,"r") as fich:
for ligne in fich:
for e in ligne:
ch+=e
liste=ch.replace("\n","").split("\t")
LAT.append(float(liste[0]))
LONG.append(float(liste[1]))
ALT.append(float(liste[2]))
ch=""
return LAT,LONG,ALT
fig = plt.figure()
axes = fig.add_subplot(111, projection="3d")
X = get_LAT_LONG_ALT("maido_elevation_data.txt")[0]
Y = get_LAT_LONG_ALT("maido_elevation_data.txt")[1]
Z = get_LAT_LONG_ALT("maido_elevation_data.txt")[2]
axes.scatter(X,Y,Z, c="r", marker="o")
axes.set_xlabel("Latitude")
axes.set_ylabel("Longitude")
axes.set_zlabel("Altitude")
plt.show()
How should I modify the script to get a good surface plot with my own data like they do?
PS: I will give you the links of the files in the comments because I'm not allowed to put more than 2 links... yes, I'm new :)
You should reshape your data it is a three column data x,y and z
You should have a file with only z values in a 2D table columns are x and rows are y.
Meshgrid fucntion in python should help.
I seems that it is not possible to change colors of a Matplotlib scatter plot through a RGB definition. Am I wrong?
Here is a code (already given in stack overflow) which work with colors indexed in float:
import matplotlib.pyplot as plt
import numpy as np
import matplotlib.animation as animation
def main():
numframes = 100
numpoints = 10
color_data = np.random.random((numframes, numpoints))
x, y, c = np.random.random((3, numpoints))
fig = plt.figure()
scat = plt.scatter(x, y, c=c, s=100)
ani = animation.FuncAnimation(fig, update_plot, frames=range(numframes),
fargs=(color_data, scat))
plt.show()
def update_plot(i, data, scat):
scat.set_array(data[i])
return scat,
main()
But if color_data is defined through RGB colors, I get an error:
ValueError: Collections can only map rank 1 arrays
The related code is the following (in this code, I just change the color of one sample each time):
import matplotlib.pyplot as plt
import numpy as np
import matplotlib.animation as animation
def main():
numframes = 100
numpoints = 10
rgb_color_data = np.random.random((numpoints, 3))
x, y = np.random.random((2, numpoints))
fig = plt.figure()
scat = plt.scatter(x, y, c=rgb_color_data, s=100) #this work well at this level
ani = animation.FuncAnimation(fig, update_plot2, frames=range(numframes),
fargs=(rgb_color_data, scat))
plt.show()
def update_plot2(i,data,scat):
data[ i%10 ] = np.random.random((3))
scat.set_array(data) # this fails
return scat,
main()
Is there a means to use set_array with RGB color array?
Not sure what you are trying to achieve. But if you are trying to change the color, why not use the set_color() function of Collection?
def update_plot2(i,data,scat):
data[ i%10 ] = np.random.random((3))
scat.set_color(data) # <<<<<<<<<<<<<<<<<<<
return scat,
I'd like to make a scatter plot where each point is colored by the spatial density of nearby points.
I've come across a very similar question, which shows an example of this using R:
R Scatter Plot: symbol color represents number of overlapping points
What's the best way to accomplish something similar in python using matplotlib?
In addition to hist2d or hexbin as #askewchan suggested, you can use the same method that the accepted answer in the question you linked to uses.
If you want to do that:
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import gaussian_kde
# Generate fake data
x = np.random.normal(size=1000)
y = x * 3 + np.random.normal(size=1000)
# Calculate the point density
xy = np.vstack([x,y])
z = gaussian_kde(xy)(xy)
fig, ax = plt.subplots()
ax.scatter(x, y, c=z, s=100)
plt.show()
If you'd like the points to be plotted in order of density so that the densest points are always on top (similar to the linked example), just sort them by the z-values. I'm also going to use a smaller marker size here as it looks a bit better:
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import gaussian_kde
# Generate fake data
x = np.random.normal(size=1000)
y = x * 3 + np.random.normal(size=1000)
# Calculate the point density
xy = np.vstack([x,y])
z = gaussian_kde(xy)(xy)
# Sort the points by density, so that the densest points are plotted last
idx = z.argsort()
x, y, z = x[idx], y[idx], z[idx]
fig, ax = plt.subplots()
ax.scatter(x, y, c=z, s=50)
plt.show()
Plotting >100k data points?
The accepted answer, using gaussian_kde() will take a lot of time. On my machine, 100k rows took about 11 minutes. Here I will add two alternative methods (mpl-scatter-density and datashader) and compare the given answers with same dataset.
In the following, I used a test data set of 100k rows:
import matplotlib.pyplot as plt
import numpy as np
# Fake data for testing
x = np.random.normal(size=100000)
y = x * 3 + np.random.normal(size=100000)
Output & computation time comparison
Below is a comparison of different methods.
1: mpl-scatter-density
Installation
pip install mpl-scatter-density
Example code
import mpl_scatter_density # adds projection='scatter_density'
from matplotlib.colors import LinearSegmentedColormap
# "Viridis-like" colormap with white background
white_viridis = LinearSegmentedColormap.from_list('white_viridis', [
(0, '#ffffff'),
(1e-20, '#440053'),
(0.2, '#404388'),
(0.4, '#2a788e'),
(0.6, '#21a784'),
(0.8, '#78d151'),
(1, '#fde624'),
], N=256)
def using_mpl_scatter_density(fig, x, y):
ax = fig.add_subplot(1, 1, 1, projection='scatter_density')
density = ax.scatter_density(x, y, cmap=white_viridis)
fig.colorbar(density, label='Number of points per pixel')
fig = plt.figure()
using_mpl_scatter_density(fig, x, y)
plt.show()
Drawing this took 0.05 seconds:
And the zoom-in looks quite nice:
2: datashader
Datashader is an interesting project. It has added support for matplotlib in datashader 0.12.
Installation
pip install datashader
Code (source & parameterer listing for dsshow):
import datashader as ds
from datashader.mpl_ext import dsshow
import pandas as pd
def using_datashader(ax, x, y):
df = pd.DataFrame(dict(x=x, y=y))
dsartist = dsshow(
df,
ds.Point("x", "y"),
ds.count(),
vmin=0,
vmax=35,
norm="linear",
aspect="auto",
ax=ax,
)
plt.colorbar(dsartist)
fig, ax = plt.subplots()
using_datashader(ax, x, y)
plt.show()
It took 0.83 s to draw this:
There is also possibility to colorize by third variable. The third parameter for dsshow controls the coloring. See more examples here and the source for dsshow here.
3: scatter_with_gaussian_kde
def scatter_with_gaussian_kde(ax, x, y):
# https://stackoverflow.com/a/20107592/3015186
# Answer by Joel Kington
xy = np.vstack([x, y])
z = gaussian_kde(xy)(xy)
ax.scatter(x, y, c=z, s=100, edgecolor='')
It took 11 minutes to draw this:
4: using_hist2d
import matplotlib.pyplot as plt
def using_hist2d(ax, x, y, bins=(50, 50)):
# https://stackoverflow.com/a/20105673/3015186
# Answer by askewchan
ax.hist2d(x, y, bins, cmap=plt.cm.jet)
It took 0.021 s to draw this bins=(50,50):
It took 0.173 s to draw this bins=(1000,1000):
Cons: The zoomed-in data does not look as good as in with mpl-scatter-density or datashader. Also you have to determine the number of bins yourself.
5: density_scatter
The code is as in the answer by Guillaume.
It took 0.073 s to draw this with bins=(50,50):
It took 0.368 s to draw this with bins=(1000,1000):
Also, if the number of point makes KDE calculation too slow, color can be interpolated in np.histogram2d [Update in response to comments: If you wish to show the colorbar, use plt.scatter() instead of ax.scatter() followed by plt.colorbar()]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import cm
from matplotlib.colors import Normalize
from scipy.interpolate import interpn
def density_scatter( x , y, ax = None, sort = True, bins = 20, **kwargs ) :
"""
Scatter plot colored by 2d histogram
"""
if ax is None :
fig , ax = plt.subplots()
data , x_e, y_e = np.histogram2d( x, y, bins = bins, density = True )
z = interpn( ( 0.5*(x_e[1:] + x_e[:-1]) , 0.5*(y_e[1:]+y_e[:-1]) ) , data , np.vstack([x,y]).T , method = "splinef2d", bounds_error = False)
#To be sure to plot all data
z[np.where(np.isnan(z))] = 0.0
# Sort the points by density, so that the densest points are plotted last
if sort :
idx = z.argsort()
x, y, z = x[idx], y[idx], z[idx]
ax.scatter( x, y, c=z, **kwargs )
norm = Normalize(vmin = np.min(z), vmax = np.max(z))
cbar = fig.colorbar(cm.ScalarMappable(norm = norm), ax=ax)
cbar.ax.set_ylabel('Density')
return ax
if "__main__" == __name__ :
x = np.random.normal(size=100000)
y = x * 3 + np.random.normal(size=100000)
density_scatter( x, y, bins = [30,30] )
You could make a histogram:
import numpy as np
import matplotlib.pyplot as plt
# fake data:
a = np.random.normal(size=1000)
b = a*3 + np.random.normal(size=1000)
plt.hist2d(a, b, (50, 50), cmap=plt.cm.jet)
plt.colorbar()