Test for comparing multi-dimensional distributions in python - python

I have the following datasets:
import random
import pandas as pd
A = pd.DataFrame({'x':[random.uniform(0, 1) for i in range(0,100)], 'y':[random.uniform(0, 1) for i in range(0,100)]})
B = pd.DataFrame({'x':[random.uniform(0, 1) for i in range(0,100)], 'y':[random.uniform(0, 1) for i in range(0,100)]})
From these two datasets, I can produce the following plots
import matplotlib.pyplot as plt
import scipy.stats as st
def plot_2d_kde(df):
# Extract x and y
x = df['x']
y = df['y']
# Define the borders
deltaX = (max(x) - min(x))/10
deltaY = (max(y) - min(y))/10
xmin = min(x) - deltaX
xmax = max(x) + deltaX
ymin = min(y) - deltaY
ymax = max(y) + deltaY
# Create meshgrid
xx, yy = np.mgrid[xmin:xmax:100j, ymin:ymax:100j]
# We will fit a gaussian kernel using the scipy’s gaussian_kde method
positions = np.vstack([xx.ravel(), yy.ravel()])
values = np.vstack([x, y])
kernel = st.gaussian_kde(values)
f = np.reshape(kernel(positions).T, xx.shape)
fig = plt.figure(figsize=(13, 7))
ax = plt.axes(projection='3d')
surf = ax.plot_surface(xx, yy, f, rstride=1, cstride=1, cmap='coolwarm', edgecolor='none')
ax.set_xlabel('x')
ax.set_ylabel('y')
ax.set_zlabel('PDF')
ax.set_title('Surface plot of Gaussian 2D KDE')
fig.colorbar(surf, shrink=0.5, aspect=5) # add color bar indicating the PDF
ax.view_init(60, 35)
plot_2d_kde(A)
plot_2d_kde(B)
Is there a way to test if the multidimensional PDF of A differs from the one of B in python?

Related

Scipy RegularGridInterpolator turns interpolated vector field

The task:
I am trying to interpolate a vector field on a regular grid, i.e.:
The issue:
I am using the RegularGridInterpolator from scipy to do this. However, it seems that the resulting vector field is turned with respect to the original:
Anyone knows why?
Python code to reproduce example:
from scipy.interpolate import RegularGridInterpolator
import matplotlib.pyplot as plt
import numpy as np
# ORIGINAL
# Number of points (NxN)
N = 50
# Boundaries
ymin = -2.; ymax = 2.
xmin = -2.; xmax = 2.
# Create Meshgrid
x = np.linspace(xmin,xmax, N)
y = np.linspace(ymin,ymax, N)
xx, yy = np.meshgrid(x, y)
# Vector Field
Fx = np.cos(xx + 2*yy)
Fy = np.sin(xx - 2*yy)
# Plot vector field
fig, ax = plt.subplots()
ax.quiver(x, y, Fx, Fy)
plt.title("Original")
plt.show()
# REDUCED
# Number of points (NxN)
N = 10
# Boundaries
ymin = -2.; ymax = 2.
xmin = -2.; xmax = 2.
# Create Meshgrid
x = np.linspace(xmin,xmax, N)
y = np.linspace(ymin,ymax, N)
xx, yy = np.meshgrid(x, y)
# Vector Field
Fx = np.cos(xx + 2*yy)
Fy = np.sin(xx - 2*yy)
# Plot vector field
fig, ax = plt.subplots()
ax.quiver(x, y, Fx, Fy)
plt.title("Reduced")
plt.show()
# INTERPOLATED VERSION BASED ON REDUCED
# Iterpolate
my_interpolating_function_x = RegularGridInterpolator((x, y), Fx)
my_interpolating_function_y = RegularGridInterpolator((x, y), Fy)
# Create Meshgrid
N = 50
x = np.linspace(xmin,xmax, N)
y = np.linspace(ymin,ymax, N)
grid = np.meshgrid(x, y)
new_points = np.vstack(list(map(np.ravel, grid))).T
# Interpolate
F_x_inter = my_interpolating_function_x(new_points)
F_y_inter = my_interpolating_function_y(new_points)
# reshape
F_x_inter = np.reshape(F_x_inter,(50,50))
F_y_inter = np.reshape(F_y_inter,(50,50))
#plot
fig, ax = plt.subplots()
ax.quiver(x, y, F_x_inter, F_y_inter)
plt.title("Interpolated")
plt.show()

Get distinct boundaries in matplolib contourf

I am trying to linearly interpolate values using scipy of sets of coordinates, thereafter plotting in matplotlib. How can I achieve the distinct boundaries between each region?
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
import numpy as np
np.random.seed(42)
from scipy.interpolate import griddata
x = np.random.random(20)
y = np.random.random(20)
z = np.random.random(20)
meshSize = 50
extensionFact = 10
xi, yi, = np.meshgrid(
np.linspace(np.min(x) - np.average(x) / extensionFact, np.max(x) + np.average(x) / extensionFact, meshSize),
np.linspace(np.min(y) - np.average(y) / extensionFact, np.max(y) + np.average(y) / extensionFact, meshSize))
zi = griddata((x, y), z, (xi, yi), method='nearest')
fig = plt.figure(figsize=(8, 6))
ax1 = fig.add_subplot(111)
bounds1 = np.linspace(np.nanmin(zi), np.nanmax(zi), 11)
colors1 = plt.get_cmap('jet')(np.linspace(0, 1, len(bounds1) + 1))
cmap1 = mcolors.ListedColormap(colors1[1:-1])
norm1 = mcolors.BoundaryNorm(boundaries=bounds1, ncolors=len(bounds1) - 1)
im1 = ax1.contourf(xi, yi, zi, levels=bounds1, cmap=cmap1, alpha=1)
fig.colorbar(im1, orientation='vertical', shrink=1, aspect=30, pad=0.03, ticks=bounds1)
plt.scatter(x, y, marker='x', c='k')
plt.show()
Presently the intersection of two regions are blurred.

3D plot of the CONE using matplotlib

I'm looking for help to draw a 3D cone using matplotlib.
My goal is to draw a HSL cone, then base on the vertex coordinats i will select the color.
from matplotlib import cm
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt
import numpy as np
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
theta1 = np.linspace(0, 2*np.pi, 100)
r1 = np.linspace(-2, 0, 100)
t1, R1 = np.meshgrid(theta1, r1)
X1 = R1*np.cos(t1)
Y1 = R1*np.sin(t1)
Z1 = 5+R1*2.5
theta2 = np.linspace(0, 2*np.pi, 100)
r2 = np.linspace(0, 2, 100)
t2, R2 = np.meshgrid(theta2, r2)
X2 = R2*np.cos(t2)
Y2 = R2*np.sin(t2)
Z2 = -5+R2*2.5
ax.set_xlabel('x axis')
ax.set_ylabel('y axis')
ax.set_zlabel('z axis')
# ax.set_xlim(-2.5, 2.5)
# ax.set_ylim(-2.5, 2.5)
# ax.set_zlim(0, 5)
ax.set_aspect('equal')
ax.plot_surface(X1, Y1, Z1, alpha=0.8, color="blue")
ax.plot_surface(X2, Y2, Z2, alpha=0.8, color="blue")
# ax.plot_surface(X, Y, Z, alpha=0.8)
#fig. savefig ("Cone.png", dpi=100, transparent = False)
plt.show()
HSL CONE
My cone
So my question now is how to define color of each element.
i have found a solution, maybe it will be usefull for others.
from mpl_toolkits.mplot3d import Axes3D
from matplotlib import cm
import matplotlib.pyplot as plt
import numpy as np
import colorsys
from matplotlib.tri import Triangulation
from mpl_toolkits.mplot3d.art3d import Poly3DCollection
n_angles = 80
n_radii = 20
# An array of radii
# Does not include radius r=0, this is to eliminate duplicate points
radii = np.linspace(0.0, 0.5, n_radii)
# An array of angles
angles = np.linspace(0, 2*np.pi, n_angles, endpoint=False)
# Repeat all angles for each radius
angles = np.repeat(angles[..., np.newaxis], n_radii, axis=1)
# Convert polar (radii, angles) coords to cartesian (x, y) coords
# (0, 0) is added here. There are no duplicate points in the (x, y) plane
x = np.append(0, (radii*np.cos(angles)).flatten())
y = np.append(0, (radii*np.sin(angles)).flatten())
# Pringle surface
z = 1+-np.sqrt(x**2+y**2)*2
print(x.shape, y.shape, angles.shape, radii.shape, z.shape)
# NOTE: This assumes that there is a nice projection of the surface into the x/y-plane!
tri = Triangulation(x, y)
triangle_vertices = np.array([np.array([[x[T[0]], y[T[0]], z[T[0]]],
[x[T[1]], y[T[1]], z[T[1]]],
[x[T[2]], y[T[2]], z[T[2]]]]) for T in tri.triangles])
x2 = np.append(0, (radii*np.cos(angles)).flatten())
y2 = np.append(0, (radii*np.sin(angles)).flatten())
# Pringle surface
z2 = -1+np.sqrt(x**2+y**2)*2
# NOTE: This assumes that there is a nice projection of the surface into the x/y-plane!
tri2 = Triangulation(x2, y2)
triangle_vertices2 = np.array([np.array([[x2[T[0]], y2[T[0]], z2[T[0]]],
[x2[T[1]], y2[T[1]], z2[T[1]]],
[x2[T[2]], y2[T[2]], z2[T[2]]]]) for T in tri2.triangles])
triangle_vertices = np.concatenate([triangle_vertices, triangle_vertices2])
midpoints = np.average(triangle_vertices, axis=1)
def find_color_for_point(pt):
c_x, c_y, c_z = pt
angle = np.arctan2(c_x, c_y)*180/np.pi
if (angle < 0):
angle = angle + 360
if c_z < 0:
l = 0.5 - abs(c_z)/2
#l=0
if c_z == 0:
l = 0.5
if c_z > 0:
l = (1 - (1-c_z)/2)
if c_z > 0.97:
l = (1 - (1-c_z)/2)
col = colorsys.hls_to_rgb(angle/360, l, 1)
return col
facecolors = [find_color_for_point(pt) for pt in midpoints] # smooth gradient
# facecolors = [np.random.random(3) for pt in midpoints] # random colors
coll = Poly3DCollection(
triangle_vertices, facecolors=facecolors, edgecolors=None)
fig = plt.figure()
ax = fig.gca(projection='3d')
ax.add_collection(coll)
ax.set_xlim(-1, 1)
ax.set_ylim(-1, 1)
ax.set_zlim(-1, 1)
ax.elev = 50
plt.show()
Inspired from Jake Vanderplas with Python Data Science Handbook, when you are drawing some 3-D plot whose base is a circle, it is likely that you would try:
# Actually not sure about the math here though:
u, v = np.mgrid[0:2*np.pi:100j, 0:np.pi:20j]
x = np.cos(u)*np.sin(v)
y = np.sin(u)*np.sin(v)
and then think about the z-axis. Since viewing from the z-axis the cone is just a circle, so the relationships between z and x and y is clear, which is simply: z = np.sqrt(x ** 2 + y ** 2). Then you can draw the cone based on the codes below:
from mpl_toolkits import mplot3d
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import cm
def f(x, y):
return np.sqrt(x ** 2 + y ** 2)
fig = plt.figure()
ax = plt.axes(projection='3d')
# Can manipulate with 100j and 80j values to make your cone looks different
u, v = np.mgrid[0:2*np.pi:100j, 0:np.pi:80j]
x = np.cos(u)*np.sin(v)
y = np.sin(u)*np.sin(v)
z = f(x, y)
ax.plot_surface(x, y, z, cmap=cm.coolwarm)
# Some other effects you may want to try based on your needs:
# ax.plot_surface(x, y, -z, cmap=cm.coolwarm)
# ax.scatter3D(x, y, z, color="b")
# ax.plot_wireframe(x, y, z, color="b")
# ax.plot_wireframe(x, y, -z, color="r")
# Can set your view from different angles.
ax.view_init(azim=15, elev=15)
ax.set_xlabel("x")
ax.set_ylabel("y")
ax.set_zlabel("z")
plt.show()
ax.set_ylabel("y")
ax.set_zlabel("z")
plt.show()
And from my side, the cone looks like:
and hope it helps.

How to get build a consistent discrete colormap/colorbar with "upper" and "lower" values

An image is worth a thousand words :
https://www.harrisgeospatial.com/docs/html/images/colorbars.png
I want to obtain the same color bar than the one on the right with matplotlib.
Default behavior use the same color for "upper"/"lower" and adjacent cell...
Thank you for your help!
Here is the code I have:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.colors as colors
N = 100
X, Y = np.mgrid[-3:3:complex(0, N), -2:2:complex(0, N)]
Z1 = np.exp(-X**2 - Y**2)
Z2 = np.exp(-(X - 1)**2 - (Y - 1)**2)
Z = (Z1 - Z2) * 2
fig, ax = plt.subplots(1, 1, figsize=(8, 8))
# even bounds gives a contour-like effect
bounds = np.linspace(-1, 1, 10)
norm = colors.BoundaryNorm(boundaries=bounds, ncolors=256)
pcm = ax.pcolormesh(X, Y, Z,
norm=norm,
cmap='RdBu_r')
fig.colorbar(pcm, ax=ax, extend='both', orientation='vertical')
In order to have the "over"/"under"-color of a colormap take the first/last color of that map but still be different from the last color inside the colormapped range you can get one more color from a colormap than you have boundaries in the BoundaryNorm and use the first and last color as the respective colors for the "over"/"under"-color.
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
N = 100
X, Y = np.mgrid[-3:3:complex(0, N), -2:2:complex(0, N)]
Z1 = np.exp(-X**2 - Y**2)
Z2 = np.exp(-(X - 1)**2 - (Y - 1)**2)
Z = (Z1 - Z2) * 2
fig, ax = plt.subplots(1, 1, figsize=(8, 8))
# even bounds gives a contour-like effect
bounds = np.linspace(-1, 1, 11)
# get one more color than bounds from colormap
colors = plt.get_cmap('RdBu_r')(np.linspace(0,1,len(bounds)+1))
# create colormap without the outmost colors
cmap = mcolors.ListedColormap(colors[1:-1])
# set upper/lower color
cmap.set_over(colors[-1])
cmap.set_under(colors[0])
# create norm from bounds
norm = mcolors.BoundaryNorm(boundaries=bounds, ncolors=len(bounds)-1)
pcm = ax.pcolormesh(X, Y, Z, norm=norm, cmap=cmap)
fig.colorbar(pcm, ax=ax, extend='both', orientation='vertical')
plt.show()
As suggested in my comment you can change the color map with
pcm = ax.pcolormesh(X, Y, Z, norm=norm, cmap='rainbow_r')
That gives:
You can define your own color map as shown here: Create own colormap using matplotlib and plot color scale

Plotting a masked surface plot using python, numpy and matplotlib

I'm plotting a surface using matplotlib 1.1.0.
The plot Z axis is masked like so:
Zm = ma.masked_where((abs(z_grid) < 1.09) & (abs(z_grid) > 0.91), (z_surface))
surf = ax.plot_surface(X, Y,Zm, rstride=2, cstride=2, cmap=colors,linewidth=0, antialiased=False)
But I'm not seeing the mask applied on the plot. I plotted the mask itself as a subplot
surf = ax.plot_surface(X, Y,ma.getmask(Zm), rstride=2, cstride=2, cmap=colors,linewidth=0, antialiased=False)
Which worked, so I know my mask does actually contain True values.
Full code:
from pylab import *
import matplotlib.pyplot as plt
from matplotlib.widgets import Button
import numpy
from mpl_toolkits.mplot3d.axes3d import Axes3D
from matplotlib import patches
from matplotlib.figure import Figure
from matplotlib import rcParams
fig = plt.figure(figsize=plt.figaspect(0.5))
ax = fig.add_subplot(1, 2, 1,projection='3d')
pole_positions_orig = [-0.6+0.73j];
zero_positions_orig = [0.29-0.41j];
surface_limit = 1.7;
min_val = -surface_limit;
max_val = surface_limit;
surface_resolution = 0.0333;
X = numpy.arange(min_val,max_val,surface_resolution)
Y = numpy.arange(min_val,max_val,surface_resolution)
X, Y = numpy.meshgrid(X, Y)
z_grid = X + Y*1j;
z_surface = z_grid*0;
pole_positions = numpy.round(pole_positions_orig,1) + surface_resolution/2+(surface_resolution/2)*1j;
zero_positions = numpy.round(zero_positions_orig,1) + surface_resolution/2 +(surface_resolution/2)*1j;
for k in range(0, len(zero_positions)):
z_surface = z_surface + 20*log10((z_grid - zero_positions[k].real - zero_positions[k].imag*1j));
z_surface = z_surface + 20*log10((z_grid - zero_positions[k].real + zero_positions[k].imag*1j));
for k in range(0, len(pole_positions)):
z_surface = z_surface - 20*log10((z_grid - pole_positions[k].real - pole_positions[k].imag*1j));
z_surface = z_surface - 20*log10((z_grid - pole_positions[k].real + pole_positions[k].imag*1j));
colors = cm.jet;
colors.set_bad('k');
Zm = ma.masked_where((abs(z_grid) < 1.09) & (abs(z_grid) > 0.91), (z_surface))
z_surface = Zm;
surf = ax.plot_surface(X, Y,z_surface, rstride=2, cstride=2, cmap=colors,linewidth=0, antialiased=False)
ticks = [-1, 1];
z_ticks = [-30,-20,-10,0,10,20,30];
ax.set_xticks(ticks);
ax.set_yticks(ticks);
ax.set_zticks(z_ticks);
ax.set_xlabel('Re')
ax.set_ylabel('Im')
ax.set_zlabel('Mag(db)',ha='left')
plt.setp(ax.get_zticklabels(), fontsize=7)
plt.setp(ax.get_xticklabels(), fontsize=7)
plt.setp(ax.get_yticklabels(), fontsize=7)
ax = fig.add_subplot(1, 2, 2,projection='3d')
surf = ax.plot_surface(X, Y,ma.getmask(z_surface), rstride=2, cstride=2, cmap=colors,linewidth=0, antialiased=False)
ax.grid(b=None);
show();
This is what I have:
This is what I want (from matlab):
What am I missing?
Fraxel mentioned that surface_plot doesn't support masking. In order to get around the issue, this is what I did:
I basically manually masked the z axis data by setting every masked value to numpy.nan like so:
Zm = ma.masked_where((abs(z_grid) < 1.02) & (abs(z_grid) > 0.98), (z_surface))
z_surface[where(ma.getmask(Zm)==True)] = numpy.nan
However, it messed up my colormap scaling. To fix that, I did this:
cmap = cm.jet
lev = numpy.arange(-30,30,1);
norml = colors.BoundaryNorm(lev, 256)
surf = ax.plot_surface(X, Y, z_surface,...,norm = norml)
Not 100% what I wanted, but a good compromise nonetheless.
You can do it, but you need to do it by manually colouring the surface faces yourself;
the cmap function takes a nubmer between 0 and 1, so we just need to normalise the values before calling the cmap function on them.
z_surface = numpy.real(z_surface)
min_z, max_z = z_surface.min(), z_surface.max()
colours = numpy.zeros_like(z_surface, dtype=object)
for i in range(len(z_surface)):
for j in range(len(z_surface[0])):
if 0.91 < numpy.sqrt(X[i,j]**2 + Y[i,j]**2) < 1.09:
colours[i,j] = "red"
else:
colours[i,j] = plt.get_cmap("jet")((z_surface[i,j]-min_z) / (max_z - min_z))
surf = ax.plot_surface(X, Y, z_surface, rstride=2, cstride=2, facecolors=colours, linewidth=0, antialiased=False)
I should also point out that matplotlib is casting your z array to real - whether or not you are taking advantage of this on purpose though i don't know.

Categories