I am drawing a map using basemap from matplotlib. The data are spreaded all over the world, but I just want to retain all the data on the continent and drop those on the ocean. Is there a way that I can filter the data, or is there a way to draw the ocean again to cover the data?
There's method in matplotlib.basemap: is_land(xpt, ypt)
It returns True if the given x,y point (in projection coordinates) is over land, False otherwise. The definition of land is based upon the GSHHS coastline polygons associated with the class instance. Points over lakes inside land regions are not counted as land points.
For more information, see here.
is_land() will loop all the polygons to check whether it's land or not. For large data size, it's very slow. You can use points_inside_poly() from matplotlib to check an array of points quickly. Here is the code. It doesn't check lakepolygons, if you want remove points in lakes, you can add your self.
It took 2.7 seconds to check 100000 points on my PC. If you want more speed, you can convert the polygons into a bitmap, but it's a little difficult to do this. Please tell me if the following code is not fast enought for your dataset.
from mpl_toolkits.basemap import Basemap
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.nxutils as nx
def points_in_polys(points, polys):
result = []
for poly in polys:
mask = nx.points_inside_poly(points, poly)
result.extend(points[mask])
points = points[~mask]
return np.array(result)
points = np.random.randint(0, 90, size=(100000, 2))
m = Basemap(projection='moll',lon_0=0,resolution='c')
m.drawcoastlines()
m.fillcontinents(color='coral',lake_color='aqua')
x, y = m(points[:,0], points[:,1])
loc = np.c_[x, y]
polys = [p.boundary for p in m.landpolygons]
land_loc = points_in_polys(loc, polys)
m.plot(land_loc[:, 0], land_loc[:, 1],'ro')
plt.show()
The HYRY's answer won't work on new versions of matplotlib (nxutils is deprecated). I've made a new version that works:
from mpl_toolkits.basemap import Basemap
import matplotlib.pyplot as plt
from matplotlib.path import Path
import numpy as np
map = Basemap(projection='cyl', resolution='c')
lons = [0., 0., 16., 76.]
lats = [0., 41., 19., 51.]
x, y = map(lons, lats)
locations = np.c_[x, y]
polygons = [Path(p.boundary) for p in map.landpolygons]
result = np.zeros(len(locations), dtype=bool)
for polygon in polygons:
result += np.array(polygon.contains_points(locations))
print result
The simplest way is to use basemap's maskoceans.
If for each lat, lon you have a data and you want to
use contours:
After meshgrid and interpolation:
from scipy.interpolate import griddata as gd
from mpl_toolkits.basemap import Basemap, cm, maskoceans
xi, yi = np.meshgrid(xi, yi)
zi = gd((mlon, mlat),
scores,
(xi, yi),
method=grid_interpolation_method)
#mask points on ocean
data = maskoceans(xi, yi, zi)
con = m.contourf(xi, yi, data, cmap=cm.GMT_red2green)
#note instead of zi we have data now.
Update (much faster than in_land or in_polygon solutions):
If for each lat, lon you don't have any data, and you just want to scatter the points only over land:
x, y = m(lons, lats)
samples = len(lons)
ocean = maskoceans(lons, lats, datain=np.arange(samples),
resolution='i')
ocean_samples = np.ma.count_masked(ocean)
print('{0} of {1} points in ocean'.format(ocean_samples, samples))
m.scatter(x[~ocean.mask], y[~ocean.mask], marker='.', color=colors[~ocean.mask], s=1)
m.drawcountries()
m.drawcoastlines(linewidth=0.7)
plt.savefig('a.png')
I was answering this question, when I was told that it would be better to post my answer over here. Basically, my solution extracts the polygons that are used to draw the coastlines of the Basemap instance and combines these polygons with the outline of the map to produce a matplotlib.PathPatch that overlays the ocean areas of the map.
This especially useful if the data is coarse and interpolation of the data is not wanted. In this case using maskoceans produces a very grainy outline of the coastlines, which does not look very good.
Here is the same example I posted as answer for the other question:
from matplotlib import pyplot as plt
from mpl_toolkits import basemap as bm
from matplotlib import colors
import numpy as np
import numpy.ma as ma
from matplotlib.patches import Path, PathPatch
fig, ax = plt.subplots()
lon_0 = 319
lat_0 = 72
##some fake data
lons = np.linspace(lon_0-60,lon_0+60,10)
lats = np.linspace(lat_0-15,lat_0+15,5)
lon, lat = np.meshgrid(lons,lats)
TOPO = np.sin(np.pi*lon/180)*np.exp(lat/90)
m = bm.Basemap(resolution='i',projection='laea', width=1500000, height=2900000, lat_ts=60, lat_0=lat_0, lon_0=lon_0, ax = ax)
m.drawcoastlines(linewidth=0.5)
x,y = m(lon,lat)
pcol = ax.pcolormesh(x,y,TOPO)
##getting the limits of the map:
x0,x1 = ax.get_xlim()
y0,y1 = ax.get_ylim()
map_edges = np.array([[x0,y0],[x1,y0],[x1,y1],[x0,y1]])
##getting all polygons used to draw the coastlines of the map
polys = [p.boundary for p in m.landpolygons]
##combining with map edges
polys = [map_edges]+polys[:]
##creating a PathPatch
codes = [
[Path.MOVETO] + [Path.LINETO for p in p[1:]]
for p in polys
]
polys_lin = [v for p in polys for v in p]
codes_lin = [c for cs in codes for c in cs]
path = Path(polys_lin, codes_lin)
patch = PathPatch(path,facecolor='white', lw=0)
##masking the data:
ax.add_patch(patch)
plt.show()
This produces the following plot:
Hope this is helpful to someone :)
Related
I have a polygon shapefile (the state of Illinois) and a CSV file with (lat, lon, zvalue). I want to plot a smooth contour plot representing those zvalues. Following is my code:
import glob
import fiona
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from mpl_toolkits.basemap import Basemap
from matplotlib.mlab import griddata
# Read in the tabulated data
tabfname = glob.glob("Outputs\\*.csv")[0]
df = pd.read_table(tabfname, sep=",")
print(df.head())
lat, lon, z = list(df.y), list(df.x), list(df["Theil Sen Slope"])
z0, z1, z2 = np.min(z)+0.03, np.mean(z), np.max(z)-0.01
# Read some metadata of the shapefile
shp = glob.glob("GIS\\*.shp")[0]
with fiona.drivers():
with fiona.open(shp) as src:
bnds = src.bounds
extent = [values for values in bnds]
lono = np.mean([extent[0], extent[2]])
lato = np.mean([extent[1], extent[3]])
llcrnrlon = extent[0]-0.5
llcrnrlat = extent[1]-0.5
urcrnrlon = extent[2]+0.5
urcrnrlat = extent[3]+0.5
# Create a Basemap
fig = plt.figure()
ax = fig.add_subplot(111)
m = Basemap(llcrnrlon=llcrnrlon, llcrnrlat=llcrnrlat,
urcrnrlon=urcrnrlon, urcrnrlat=urcrnrlat,
resolution='i', projection='tmerc' , lat_0 = lato, lon_0 = lono)
# Read in and display the shapefile
m.readshapefile(shp.split(".")[0], 'shf', zorder=2, drawbounds=True)
# Compute the number of bins to aggregate data
nx = 100
ny = 100
# Create a mesh and interpolate data
xi = np.linspace(llcrnrlon, urcrnrlon, nx)
yi = np.linspace(llcrnrlat, urcrnrlat, ny)
xgrid, ygrid = np.meshgrid(xi, yi)
xs, ys = m(xgrid, ygrid)
zs = griddata(lon, lat, z, xgrid, ygrid, interp='nn')
# Plot the contour map
conf = m.contourf(xs, ys, zs, 30, zorder=1, cmap='jet')
cbar = m.colorbar(conf, location='bottom', pad="5%", ticks=(z0, z1, z2))
# Scatter plot of the points that make up the contour
for x, y in zip(lon, lat):
X, Y = m(x,y)
m.scatter(X, Y, zorder=4, color='black', s=1)
plt.show()
fig.savefig("Myplot.png", format="png")
And this is the output I got(The scattered black dots are there to show the spatial distribution of the points from which the interpolation was generated. I used Nearest Neighbor interpolation method here.):
I basically referred to the examples given in the following two links to plot this:
https://gist.github.com/urschrei/29cd446ae8a8ec60ddbc
https://matplotlib.org/basemap/users/examples.html
Now this image has 3 problems:
The interpolated contour does not expand within the whole of the shapefile
The part of the contour plot protruding out of the shapefile boundary is not masked off
The contour is not smooth.
What I want is to overcome these three deficiencies of my plot and generate a smooth and nice looking plot similar to the ones shown below (Source: https://doi.org/10.1175/JCLI3557.1 ):
How do I achieve that?
I'm trying to create a contour plot on a North Polar Stereographic map projection using Cartopy. I used add_cyclic_point() to try and get around the problem of having a gap between longitude 0 and longitude 35X and followed an example from the documentation (always_circular_stereographic) to set up the map axes.
When I call plt.contour, I get the following plot. It looks like the contour plotter is getting confused at the transition from 355 to 0 longitude, and sends contour lines around the globe.
Here is my code:
import numpy as np
import cartopy.crs as ccrs
from cartopy.util import add_cyclic_point
import matplotlib.pyplot as plt
def define_map():
from matplotlib.path import Path
fig = plt.figure(figsize=(10,10))
ax = plt.axes(projection=ccrs.NorthPolarStereo())
ax.coastlines()
# From example: http://scitools.org.uk/cartopy/docs/latest/examples/always_circular_stereo.html
theta = np.linspace(0, 2*np.pi, 100)
center, radius = [0.5, 0.5], 0.5
verts = np.vstack([np.sin(theta), np.cos(theta)]).T
circle = Path(verts * radius + center)
ax.set_boundary(circle, transform=ax.transAxes)
return(fig, ax)
lats = np.arange(65,91,5)
lons = add_cyclic_point(np.arange(0,359,5))
data = add_cyclic_point(np.random.random((len(lats),len(lons)-1)))
fig, ax = define_map()
plt.contour(lons,lats,data,5,transform=ccrs.PlateCarree(), cmap=plt.cm.Blues)
plt.colorbar(fraction=0.05, shrink=0.9)
plt.show()
How do I do a Cartopy contour plot properly?
Also, why do the contours only show up with transform=ccrs.PlateCarree() and not with transform=ccrs.NorthPolarStereo()?
Apparently the add_cyclic_point function is just for the data; the contour routine treats 0 different than 360. So the simple fix is to set
lons = np.arange(0,360,5)
I'm making a contour plot that is clipped to a polygon path:
import matplotlib.pyplot as plt
from matplotlib.patches import Polygon
import numpy as np
fig = plt.figure()
axes = plt.subplot()
x,y = np.meshgrid( np.linspace(-10,10,51), np.linspace(-10,10,51) )
z = np.sin(np.sqrt(x**2+y**2))
CS = axes.contour(x, y, z, np.linspace(-1,1,11) )
axes.set_aspect('equal')
# clip contours by polygon
radius = 8
t = np.linspace(0,2*np.pi,101)
x_bound,y_bound = radius*np.sin(t),radius*(np.cos(t)+0.1*(np.cos(7*t)))
clip_map = Polygon(list(zip(x_bound,y_bound)),fc='#EEEEEE',ec='none')
axes.add_patch(clip_map)
for collection in CS.collections:
collection.set_clip_path(clip_map)
# label the contours
CLB = axes.clabel(CS, colors='black')
for text_object in CLB:
text_object.set_clip_path(clip_map) # Doesn't do anything!
plt.show()
To my surprise, the labels aren't clipped despite the Text objects having a set_clip_path method that doesn't return an error:
How can I clip the labels outside of the gray polygon area? Do I need to resort to manually finding the X and Y positions, calculating point in polygon, and set_visible = False for each Text item? Why doesn't this code work as-is? I'm using matplotlib version 1.5.1 and python 3.5.1.
Just in case someone comes across the same issue someday, here's a solution that resorts to having to use the shapely package to test for point in polygon to set the visibility state of the Text object. It gets the job done, but it would be nice if it was possible to use set_clip_path to work directly on the Text object.
import matplotlib.pyplot as plt
from matplotlib.patches import Polygon
import numpy as np
from shapely.geometry import Polygon as ShapelyPolygon
from shapely.geometry import Point as ShapelyPoint
fig = plt.figure()
axes = plt.subplot()
x,y = np.meshgrid( np.linspace(-10,10,51), np.linspace(-10,10,51) )
z = np.sin(np.sqrt(x**2+y**2))
CS = axes.contour(x, y, z, np.linspace(-1,1,11) )
axes.set_aspect('equal')
# clip contours by polygon
radius = 8
t = np.linspace(0,2*np.pi,101)
x_bound,y_bound = radius*np.sin(t),radius*(np.cos(t)+0.1*(np.cos(7*t)))
clip_map = Polygon(list(zip(x_bound,y_bound)),fc='#EEEEEE',ec='none')
axes.add_patch(clip_map)
for collection in CS.collections:
collection.set_clip_path(clip_map)
# label the contours
CLB = axes.clabel(CS, colors='black')
clip_map_shapely = ShapelyPolygon(clip_map.get_xy())
for text_object in CLB:
if not clip_map_shapely.contains(ShapelyPoint(text_object.get_position())):
text_object.set_visible(False)
plt.show()
I'm struggling with creating a quite complex 3d figure in python, specifically using iPython notebook. I can partition the content of the graph into two sections:
The (x,y) plane: Here a two-dimensional random walk is bobbing around, let's call it G(). I would like to plot part of this trajectory on the (x,y) plane. Say, 10% of all the data points of G(). As G() bobs around, it visits some (x,y) pairs more frequently than others. I would like to estimate this density of G() using a kernel estimation approach and draw it as contour lines on the (x,y) plane.
The (z) plane: Here, I would like to draw a mesh or (transparent) surface plot of the information theoretical surprise of a bivariate normal. Surprise is simply -log(p(i)) or the negative (base 2) logarithm of outcome i. Given the bivariate normal, each (x,y) pair has some probability p(x,y) and the surprise of this is simply -log(p(x,y)).
Essentially these two graphs are independent. Assume the interval of the random walk G() is [xmin,xmax],[ymin,ymax] and of size N. The bivariate normal in the z-plane should be drawn from the same interval, such that for each (x,y) pair in the random walk, I can draw a (dashed) line from some subset of the random walk n < N to the bivariate normal. Assume that G(10) = (5,5) then I would like to draw a dashed line from (5,5) up the Z-axes, until it hits the bivariate normal.
So far, I've managed to plot G() in a 3-d space, and estimate the density f(X,Y) using scipy.stats.gaussian_kde. In another (2d) graph, I have the sort of contour lines I want. What I don't have, is the contour lines in the 3d-plot using the estimated KDE density. I also don't have the bivariate normal plot, or the projection of a few random points from the random walk, to the surface of the bivariate normal. I've added a hand drawn figure, which might ease intuition (ignore the label on the z-axis and the fact that there is no mesh.. difficult to draw!)
Any input, even just partial, such as how to draw the contour lines in the (x,y) plane of the 3d graph, or a mesh of a bivariate normal would be much appreciated.
Thanks!
import matplotlib as mpl
import matplotlib.pyplot as plt
import random
import numpy as np
import seaborn as sns
import scipy
from mpl_toolkits.mplot3d import Axes3D
%matplotlib inline
def randomwalk():
mpl.rcParams['legend.fontsize'] = 10
xyz = []
cur = [0, 0]
for _ in range(400):
axis = random.randrange(0, 2)
cur[axis] += random.choice([-1, 1])
xyz.append(cur[:])
x, y = zip(*xyz)
data = np.vstack([x,y])
kde = scipy.stats.gaussian_kde(data)
density = kde(data)
fig1 = plt.figure()
ax = fig1.gca(projection='3d')
ax.plot(x, y, label='Random walk')
sns.kdeplot(data[0,:], data[1,:], 0)
ax.scatter(x[-1], y[-1], c='b', marker='o') # End point
ax.legend()
fig2 = plt.figure()
sns.kdeplot(data[0,:], data[1,:])
Calling randomwalk() initialises and plots this:
Edit #1:
Made some progress, actually the only thing I need is to restrict the height of the dashed vertical lines to the bivariate. Any ideas?
import matplotlib as mpl
import matplotlib.pyplot as plt
import random
import numpy as np
import seaborn as sns
import scipy
from mpl_toolkits.mplot3d import Axes3D
from matplotlib.mlab import bivariate_normal
%matplotlib inline
# Data for random walk
def randomwalk():
mpl.rcParams['legend.fontsize'] = 10
xyz = []
cur = [0, 0]
for _ in range(40):
axis = random.randrange(0, 2)
cur[axis] += random.choice([-1, 1])
xyz.append(cur[:])
# Get density
x, y = zip(*xyz)
data = np.vstack([x,y])
kde = scipy.stats.gaussian_kde(data)
density = kde(data)
# Data for bivariate gaussian
a = np.linspace(-7.5, 7.5, 20)
b = a
X,Y = np.meshgrid(a, b)
Z = bivariate_normal(X, Y)
surprise_Z = -np.log(Z)
# Get random points from walker and plot up z-axis to the gaussian
M = data[:,np.random.choice(20,5)].T
# Plot figure
fig = plt.figure(figsize=(10, 7))
ax = fig.gca(projection='3d')
ax.plot(x, y, 'grey', label='Random walk') # Walker
ax.scatter(x[-1], y[-1], c='k', marker='o') # End point
ax.legend()
surf = ax.plot_surface(X, Y, surprise_Z, rstride=1, cstride=1,
cmap = plt.cm.gist_heat_r, alpha=0.1, linewidth=0.1)
#fig.colorbar(surf, shrink=0.5, aspect=7, cmap=plt.cm.gray_r)
for i in range(5):
ax.plot([M[i,0], M[i,0]],[M[i,1], M[i,1]], [0,10],'k--',alpha=0.8, linewidth=0.5)
ax.set_zlim(0, 50)
ax.set_xlim(-10, 10)
ax.set_ylim(-10, 10)
Final code,
import matplotlib as mpl
import matplotlib.pyplot as plt
import random
import numpy as np
import seaborn as sns
import scipy
from mpl_toolkits.mplot3d import Axes3D
from matplotlib.mlab import bivariate_normal
%matplotlib inline
# Data for random walk
def randomwalk():
mpl.rcParams['legend.fontsize'] = 10
xyz = []
cur = [0, 0]
for _ in range(50):
axis = random.randrange(0, 2)
cur[axis] += random.choice([-1, 1])
xyz.append(cur[:])
# Get density
x, y = zip(*xyz)
data = np.vstack([x,y])
kde = scipy.stats.gaussian_kde(data)
density = kde(data)
# Data for bivariate gaussian
a = np.linspace(-7.5, 7.5, 100)
b = a
X,Y = np.meshgrid(a, b)
Z = bivariate_normal(X, Y)
surprise_Z = -np.log(Z)
# Get random points from walker and plot up z-axis to the gaussian
M = data[:,np.random.choice(50,10)].T
# Plot figure
fig = plt.figure(figsize=(10, 7))
ax = fig.gca(projection='3d')
ax.plot(x, y, 'grey', label='Random walk') # Walker
ax.legend()
surf = ax.plot_surface(X, Y, surprise_Z, rstride=1, cstride=1,
cmap = plt.cm.gist_heat_r, alpha=0.1, linewidth=0.1)
#fig.colorbar(surf, shrink=0.5, aspect=7, cmap=plt.cm.gray_r)
for i in range(10):
x = [M[i,0], M[i,0]]
y = [M[i,1], M[i,1]]
z = [0,-np.log(bivariate_normal(M[i,0],M[i,1]))]
ax.plot(x,y,z,'k--',alpha=0.8, linewidth=0.5)
ax.scatter(x, y, z, c='k', marker='o')
I have a point cloud of magnetization directions with azimut (declination between 0° and 360°) and inclination between 0° and 90°. I display these points in a polar azimuthal equidistant projection (using matplotlib basemap). That means 90° inclination will point directly in the center of the plot and the declination runs clockwise.
My problem is that I want to also plot isolines around these point clouds, which should represent where the highest density of point/directions is located. What is the easiest way to do this? Nice would be to mark the isoline which encircles 50% is my data. If Iam not mistaken - this would be the median.
So far I've fiddled around with gaussian_kde and the outlier detection of sklearn (1 and 2), but the results are not as expected.
Any ideas?
Edit #1:
First gaussian_kde
import numpy as np
import matplotlib.pyplot as plt
import scipy.stats as stats
from mpl_toolkits.basemap import Basemap
m = Basemap(projection='spaeqd',boundinglat=0,lon_0=180,resolution='l',round=True)
m.drawparallels(np.arange(-80.,1.,10.),labels=[False,True,True,False])
m.drawmeridians(np.arange(-180.,181.,30.),labels=[True,False,False,True])
#data
x, y = m(m1,-m2) #m2 is negative because I to plot in the southern hemisphere!
#set up the grid for evaluation of the KDE
yi = np.arange(0,360.1,1)
xi = np.arange(-90,1,1)
xx,yy = np.meshgrid(xi,yi)
X, Y = m(xx,yy) # to have it in my basemap projection
#setup the gaussian kde and evaluate it
#pretty much similiar to the scipy.stats docs
positions = np.vstack([X.ravel(), Y.ravel()])
values = np.vstack([x, y])
kernel = stats.gaussian_kde(values)
Z = np.reshape(kernel(positions).T, X.shape)
#plot orginal points and probaility density function
ax = plt.gca()
ax.scatter(x,y,c = 'Crimson')
TOT = ax.contour(X,Y,Z,cmap=plt.cm.Reds)
plt.show()
Then sklearn:
import numpy as np
import matplotlib.pyplot as plt
import scipy.stats as stats
from mpl_toolkits.basemap import Basemap
from sklearn import svm
from sklearn.covariance import EllipticEnvelope
m = Basemap(projection='spaeqd',boundinglat=0,lon_0=180,resolution='l',round=True)
m.drawparallels(np.arange(-80.,1.,10.),labels=[False,True,True,False])
m.drawmeridians(np.arange(-180.,181.,30.),labels=[True,False,False,True])
#data
x, y = m(m1,-m2) #m2 is negative because I to plot in the southern hemisphere!
#Similar to examples in sklearn docs
outliers_fraction = 0.5
oneclass_svm = svm.OneClassSVM(nu=0.95 * outliers_fraction + 0.05,\
kernel="rbf", gamma=0.1,verbose=True)
#seup grid
yi = np.arange(0,360.1,1)
xi = np.arange(-90,1,1)
R,T = np.meshgrid(xi,yi)
xx, yy = m(T,R)
x, y = m(m1,-m2)
#standardize data as suggested by docs
x_std = (x-x.mean())/x.std()
y_std = (y-y.mean())/y.std()
values = np.vstack([x_std, y_std])
#fit data and calculate threshold - this should mark my median - according to value of outliers_fraction
oneclass_svm.fit(values.T)
y_pred = oneclass_svm.decision_function(values.T).ravel()
threshold = stats.scoreatpercentile(y_pred, 100 * outliers_fraction)
y_pred = y_pred > threshold
#Target vector for evaluation
TV = np.c_[xx.ravel(), yy.ravel()]
TV = (TV-TV.mean(axis=0))/TV.std(axis=0) #must be standardized as well
# evaluation - This is now shifted in the plot ad does not fit my point cloud anymore - because of the standadrization
Z = oneclass_svm.decision_function(TV)
Z = Z.reshape(xx.shape)
#plotting - very similar to the example in the docs
ax = plt.gca()
ax.contourf(xx, yy, Z, levels=np.linspace(Z.min(), threshold, 7), \
cmap=plt.cm.Blues_r)
ax.contour(xx, yy, Z, levels=[threshold],
linewidths=2, colors='red')
ax.contourf(xx, yy, Z, levels=[threshold, Z.max()],
colors='orange')
ax.scatter(x, y,s=30, marker='s',c = 'RoyalBlue',label = 'Mr')
plt.show()
The EllipticEvelope works, but it is not that want I want.
Ok, I think I might found a solution. But it should not work in every case. It should fail in my opinion when the data is multimodal distributed.
Nevertheless, here is my though process:
So the Probalibity Density Function (PDF) is essentially the same as a continuous histogram. So I used np.percentile to calculate the upper and lower 25% percentile of both vectors. The I've searched for the value of the PDF at these perctiles and this should be the Isoline that i want.
Of course this should also work in the polar stereographic (or any other) projection.
Here is a litte example code of two gamma distributed data sets in a crossplot:
import numpy as np
import matplotlib.pyplot as plt
import scipy.stats as stats
from scipy.interpolate import LinearNDInterpolator, RegularGridInterpolator
#generate some data
x = np.random.gamma(10,0.8,1e4)
y = np.random.gamma(4,0.3,1e4)
#set up the data and grid for the 2D PDF
values = np.vstack([x,y])
pdf_x = np.linspace(x.min(),x.max(),1e2)
pdf_y = np.linspace(y.min(),y.max(),1e2)
X,Y = np.meshgrid(pdf_x,pdf_y)
kernel = stats.gaussian_kde(values)
#evaluate the PDF at every grid location
positions = np.vstack([X.ravel(), Y.ravel()])
Z = np.reshape(kernel(positions).T, X.shape)
#upper and lower quartiles of x and y data
xql = np.percentile(x,25)
xqu = np.percentile(x,75)
yql = np.percentile(y,25)
yqu = np.percentile(y,75)
#set up the interpolator - I could also use RegularGridInterpolator - should be faster
Interp = LinearNDInterpolator((X.flatten(),Y.flatten()),Z.flatten())
#1D example to illustrate what I mean
plt.figure()
kernel2 = stats.gaussian_kde(x)
plt.hist(x,30,normed=True)
plt.plot(pdf_x,kernel2(pdf_x),'r--',linewidth=2)
#plot vertical lines at the upper and lower quartiles
plt.vlines(np.percentile(x,25),0,0.2,color='red')
plt.vlines(np.percentile(x,75),0,0.2,color='red')
#Scatterplot / Crossplot with PDF and 25 and 75% isolines
plt.figure()
plt.scatter(x,y)
#search for the isolines defining the upper and lower quartiles
#the lower quartiles isoline should encircle 75% of the data
levels = [Interp(xql,yql),Interp(xqu,yqu)]
plt.contour(X,Y,Z,levels=levels,colors='orange')
plt.show()
To finish up I will give a quick example of what it looks in a polar stereographic projection:
import numpy as np
import matplotlib.pyplot as plt
import scipy.stats as stats
from scipy.interpolate import LinearNDInterpolator
from mpl_toolkits.basemap import Basemap
#set up the coordinate projection
m = Basemap(projection='spaeqd',boundinglat=0,lon_0=180,\
resolution='l',round=True,suppress_ticks=True)
parallelGrid = np.arange(-80.,1.,10.)
meridianGrid = np.arange(-180.0,180.1,30)
m.drawparallels(parallelGrid,labels=[False,False,False,False])
m.drawmeridians(meridianGrid,labels=[False,False,False,False],labelstyle='+/-',fmt='%i')
#Found this on stackoverflow - labels it exactly how I want it
ax = plt.gca()
ax.text(0.5,1.025,'N',transform=ax.transAxes,\
horizontalalignment='center',verticalalignment='bottom',size=25)
for para in np.arange(30,360,30):
x= (1.1*0.5*np.sin(np.deg2rad(para)))+0.5
y= (1.1*0.5*np.cos(np.deg2rad(para)))+0.5
ax.text(x,y,u'%i\N{DEGREE SIGN}'%para,transform=ax.transAxes,\
horizontalalignment='center',verticalalignment='center')
#generate some data
x = np.random.randint(180,225,size=15)
y = np.random.randint(30,40,size=15)
#into projection
x,y = m(x,-y)
values = np.vstack([x,y])
pdf_x = np.arange(0,361,1)
pdf_y = np.arange(0,91,1)
#into projection
X,Y = np.meshgrid(pdf_x,pdf_y)
X,Y = m(X,-Y)
kernel = stats.gaussian_kde(values)
positions = np.vstack([X.ravel(), Y.ravel()])
Z = np.reshape(kernel(positions).T, X.shape)
xql = np.percentile(x,25)
xqu = np.percentile(x,75)
yql = np.percentile(y,25)
yqu = np.percentile(y,75)
Interp = LinearNDInterpolator((X.flatten(),Y.flatten()),Z.flatten())
ax = plt.gca()
ax.scatter(x,y)
levels = [Interp(xql,yql),Interp(xqu,yqu)]
ax.contour(X,Y,Z,levels=levels,colors='red')
plt.show()