Large matplotlib pixel figure best approach - python

I have a large 2D dataset where I want to associate to each X,Y pair a color and plot it with matplotlib. I am talking about 1000000 points. I wonder what is the best approach in terms of performance (speed) and if you could point to some example

If you're dealing with a regular grid, just treat it as an image:
import numpy as np
import matplotlib.pyplot as plt
nrows, ncols = 1000, 1000
z = 500 * np.random.random(nrows * ncols).reshape((nrows, ncols))
plt.imshow(z, interpolation='nearest')
plt.colorbar()
plt.show()
If you have randomly ordered x,y,z triplets that make up a regular grid, then you'll need to grid them.
Essentially, you might have something like this:
import numpy as np
import matplotlib.pyplot as plt
# Generate some data
nrows, ncols = 1000, 1000
xmin, xmax = -32.4, 42.0
ymin, ymax = 78.9, 101.3
dx = (xmax - xmin) / (ncols - 1)
dy = (ymax - ymin) / (ncols - 1)
x = np.linspace(xmin, xmax, ncols)
y = np.linspace(ymin, ymax, nrows)
x, y = np.meshgrid(x, y)
z = np.hypot(x - x.mean(), y - y.mean())
x, y, z = [item.flatten() for item in (x,y,z)]
# Scramble the order of the points so that we can't just simply reshape z
indicies = np.arange(x.size)
np.random.shuffle(indicies)
x, y, z = [item[indicies] for item in (x, y, z)]
# Up until now we've just been generating data...
# Now, x, y, and z probably represent something like you have.
# We need to make a regular grid out of our shuffled x, y, z indicies.
# To do this, we have to know the cellsize (dx & dy) that the grid is on and
# the number of rows and columns in the grid.
# First we convert our x and y positions to indicies...
idx = np.round((x - x.min()) / dx).astype(np.int)
idy = np.round((y - y.min()) / dy).astype(np.int)
# Then we make an empty 2D grid...
grid = np.zeros((nrows, ncols), dtype=np.float)
# Then we fill the grid with our values:
grid[idy, idx] = z
# And now we plot it:
plt.imshow(grid, interpolation='nearest',
extent=(x.min(), x.max(), y.max(), y.min()))
plt.colorbar()
plt.show()

Related

Scipy RegularGridInterpolator turns interpolated vector field

The task:
I am trying to interpolate a vector field on a regular grid, i.e.:
The issue:
I am using the RegularGridInterpolator from scipy to do this. However, it seems that the resulting vector field is turned with respect to the original:
Anyone knows why?
Python code to reproduce example:
from scipy.interpolate import RegularGridInterpolator
import matplotlib.pyplot as plt
import numpy as np
# ORIGINAL
# Number of points (NxN)
N = 50
# Boundaries
ymin = -2.; ymax = 2.
xmin = -2.; xmax = 2.
# Create Meshgrid
x = np.linspace(xmin,xmax, N)
y = np.linspace(ymin,ymax, N)
xx, yy = np.meshgrid(x, y)
# Vector Field
Fx = np.cos(xx + 2*yy)
Fy = np.sin(xx - 2*yy)
# Plot vector field
fig, ax = plt.subplots()
ax.quiver(x, y, Fx, Fy)
plt.title("Original")
plt.show()
# REDUCED
# Number of points (NxN)
N = 10
# Boundaries
ymin = -2.; ymax = 2.
xmin = -2.; xmax = 2.
# Create Meshgrid
x = np.linspace(xmin,xmax, N)
y = np.linspace(ymin,ymax, N)
xx, yy = np.meshgrid(x, y)
# Vector Field
Fx = np.cos(xx + 2*yy)
Fy = np.sin(xx - 2*yy)
# Plot vector field
fig, ax = plt.subplots()
ax.quiver(x, y, Fx, Fy)
plt.title("Reduced")
plt.show()
# INTERPOLATED VERSION BASED ON REDUCED
# Iterpolate
my_interpolating_function_x = RegularGridInterpolator((x, y), Fx)
my_interpolating_function_y = RegularGridInterpolator((x, y), Fy)
# Create Meshgrid
N = 50
x = np.linspace(xmin,xmax, N)
y = np.linspace(ymin,ymax, N)
grid = np.meshgrid(x, y)
new_points = np.vstack(list(map(np.ravel, grid))).T
# Interpolate
F_x_inter = my_interpolating_function_x(new_points)
F_y_inter = my_interpolating_function_y(new_points)
# reshape
F_x_inter = np.reshape(F_x_inter,(50,50))
F_y_inter = np.reshape(F_y_inter,(50,50))
#plot
fig, ax = plt.subplots()
ax.quiver(x, y, F_x_inter, F_y_inter)
plt.title("Interpolated")
plt.show()

How to create a numpy array filled with average value of a vector

I am not sure how to phrase my question in any way better. Basically, I have three lists of the same length x, y and z and I want to fill a 2D numpy array in the z/y plane with the average of the associated z values.
Here is how I can achieve what I wan to do:
import numpy as np
import matplotlib.pyplot as plt
x = [37.59390426045407, 38.00530354847739, 38.28412244348653, 38.74871247986305, 38.73175910429809, 38.869008864244016, 39.188234404976555, 39.92835838352555, 40.881394113153334, 41.686136269465884]
y = [0.1305391767832006, 0.13764519613447768, 0.14573326951792354, 0.15090729309032114, 0.16355823707239897, 0.17327106424274763, 0.17749746339532224, 0.17310384614773594, 0.16545780437882962, 0.1604752704890856]
z = [0.05738534353865021, 0.012572155256903583, -0.021709582561809437, -0.11191337750722108, -0.07931921785775153, -0.06241610118871843, 0.014216349927058225, 0.042002641153291886, -0.029354425271534645, 0.061894011359833856]
n = 5
image = np.zeros(shape=(n,n))
# Fill the 2D array
x0 = min(x)
y0 = min(y)
dx = (max(x) - min(x))/n
dy = (max(y) - min(y))/n
# Loop over each 2D cell
for index_x in range(n):
for index_y in range(n):
# find the limits of the cell
x1 = x0 + index_x * dx
x2 = x0 + (index_x+1) * dx
y1 = y0 + index_y * dy
y2 = y0 + (index_y+1) * dy
# find the points of z that lie within the range of the cell
vec_z = [z[idx] for idx in range(len(z)) if x[idx]>=x1 and x[idx]<x2 and y[idx]>=y1 and y[idx]<y2]
if vec_z:
image[index_x, index_y] = np.mean(vec_z)
# In the end, used to create a surface plot
fig, ax = plt.subplots()
ax.imshow(image, cmap=plt.cm.gray, interpolation='nearest')
plt.show()
Is there a more easy way to achieve this? I can imagine there is a numpy method for that.
If I understand correctly what you want to do, maybe a 2D interpolation from scipy.interpolate.interp2d is what you are looking for.
You define the interpolation function of your points:
f = interp2d(x = x, y = y, z = z)
Then you define the X and Y meshgrid:
N = 50
x_axis = np.linspace(np.min(x), np.max(x), N)
y_axis = np.linspace(np.min(y), np.max(y), N)
X, Y = np.meshgrid(x_axis, y_axis)
Finally you can compute Z interpolated values on the meshgrid:
Z = np.zeros((N, N))
for i in range(N):
for j in range(N):
Z[i, j] = f(X[i, j], Y[i, j])
If you plot in 3D the interpolated surface, you get:
fig = plt.figure()
ax = fig.add_subplot(projection = '3d')
ax.plot_surface(X, Y, Z, cmap = 'jet', shade = False)
ax.set_xlabel('x')
ax.set_ylabel('y')
ax.set_zlabel('z')
plt.show()
Interpolated surface compared to interpolation data points:
ax.scatter(x, y, z, color = 'black', s = 100, alpha = 1)

Filter out a circle boundary in matplotlib python

I am trying to plot the electric field generated when an spheric conductor is placed in an homogeneous electric field. I based my code on this question to use np.where function to filter out the unitary circle. But instead of that, all of the field at -1< x<1 is filtered out.
My code is shown below, how can I fix this?
import sys
import numpy as np
import matplotlib.pyplot as plt
import scipy.constants as spc
def E(x, y):
"""due to an spheric conductor set on a homogeneous electric field"""
E_x = 3*x*y/((y**2+x**2)**2.5)
E_y = 3*y*y/((y**2+x**2)**2.5)-1/((y**2+x**2)**1.5)+1
return E_x, E_y
# Grid of x, y points
n= 100
a=2
y = np.linspace(-a, a, n)
x = np.linspace(-a, a, n)
X, Y = np.meshgrid(x, y)
r=(x**2+y**2)**0.5
circle = r>=1
Ex, Ey = E(X,Y)
Ex, Ey = np.where(circle,Ex,0), np.where(circle,Ey,0)
#plot
fig = plt.figure()
ax = fig.add_subplot(111)
color = 2 * np.log(np.hypot(Ex, Ey))
ax.streamplot(x, y, Ex, Ey, color=color, linewidth=1, cmap=plt.cm.inferno,
density=1, arrowstyle='->', arrowsize=1.5)
ax.set_ylabel('$y$')
ax.set_xlabel('$x$')
ax.set_ylim(-a,a)
ax.set_xlim(-a,a)
ax.set_aspect('equal')
plt.show()
When you create your r array to use in the circle filter, you are using the 1D x and y arrays, when you should be using the 2D X and Y arrays.
Change that line from
r = (x**2 + y**2)**0.5
to
r = (X**2 + Y**2)**0.5
and it works as expected

Multiple colors in the one graph in Matplotlib

Is there a way to change the color of the graph about a certain threshold in python Matplotlib ?
plt.plot(temp)
plt.plot((0, len(temp)), (100, 100), 'b-')
plt.ylabel('Some data')
plt.show()
where temp contains some data
The final image looks soomething like this:
Now is it possible to display the data above this line ( 100 in this case) in some other color ?
You can use masked array to draw multiple lines. Here is an example:
Find the intersection points between curve and the threshold line, and insert the points to the original data. Then you can call plot() twice with masked array:
import numpy as np
import pylab as pl
def threshold_plot(x, y, th, fmt_lo, fmt_hi):
idx = np.where(np.diff(y > th))[0]
x_insert = x[idx] + (th - y[idx]) / (y[idx+1] - y[idx]) * (x[idx+1] - x[idx])
y_insert = np.full_like(x_insert, th)
xn, yn = np.insert(x, idx+1, x_insert), np.insert(y, idx+1, y_insert)
mask = yn < th
pl.plot(np.ma.masked_where(mask, xn), np.ma.masked_where(mask, yn), fmt_hi, lw=2)
mask = yn > th
pl.plot(np.ma.masked_where(mask, xn), np.ma.masked_where(mask, yn), fmt_lo)
pl.axhline(th, color="black", linestyle="--")
x = np.linspace(0, 3 * np.pi, 50)
y = np.random.rand(len(x))
threshold_plot(x, y, 0.7, "b", "r")
the result:

imshow and histogram2d: can't get them to work

I'm learning Python and this is my first question here. I've read other topics related to the usage of imshow but didn't find anything useful. Sorry for my bad English.
I have plotted a set of points here, left graphic:
points (left) and image (right)
Now I'd like to see an image of the density of points, so I used imshow and histogram2d, and I got the image to the right in the previous link.
The image doesn't correspond to the distribution of points. How is this possible? I've followed the instructions in the help and even changed some parameters but nothing worked :(
The code is:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.cm as cm
j, h, k = np.loadtxt("test.dat", usecols=(2, 4, 6), \
unpack=True)
# límites
xmin = -0.5
xmax = 3.0
ymin = -0.5
ymax = 4.0
# colores
j_h = j - h
h_k = h - k
# no todas las estrellas son graficadas
x1 = 0.5
y1 = 0.5
b = 2.2
c = y1 - b * x1
x = y = np.array([])
for xi, yi in zip(h_k, j_h):
if xi < (yi - c) / b:
x = np.append(x, xi)
y = np.append(y, yi)
# gráfico
fig = plt.figure(figsize=(8, 7))
ax = fig.add_subplot(111)
#ax.plot(x, y, "go")
ax.set_xlabel(r"X", fontsize=14)
ax.set_ylabel(r"Y", fontsize=14)
ax.axis([xmin, xmax, ymin, ymax])
# imagen
rango = [[xmin, xmax], [ymin, ymax]]
binsx = int((xmax - xmin) / 0.05)
binsy = int((ymax - ymin) / 0.05)
binsxy = [binsx, binsy]
H, xedges, yedges = np.histogram2d(x, y, range=rango, bins=binsxy)
extent = [yedges[0], yedges[-1], xedges[0], xedges[-1]]
cp = ax.imshow(H, interpolation='bilinear', extent=extent, cmap=cm.jet)
fig.colorbar(cp)
plt.show()
The links for the data used is here:
https://dl.dropbox.com/u/10411539/python/test.dat
Any help is appreciated!
Try different interpolation, and transpose the matrix to get it in the same axis:
cp = ax.imshow(H.transpose()[::-1], interpolation='nearest', extent=extent, cmap=cm.jet)
Is this what you want to get? You can use pcolor (and pcolormesh) if you want to pass the x and y coordinates.
import urllib
import numpy as np
import matplotlib.pyplot as plt
f = urllib.urlopen('https://dl.dropbox.com/u/10411539/python/test.dat')
j, h, k = np.loadtxt(f, usecols=(2, 4, 6), \
unpack=True)
j, h, k
j_h = j - h
h_k = h - k
H, xedges, yedges = np.histogram2d(j_h, h_k, bins=100)
plt.pcolor(xedges, yedges, H)
For imshow you have to reverse the first dimension, because imshow uses zero-based row, column indices to the x, y. Drawing from the top down.
plt.imshow(H[::-1,:], extent=(0,5, 0,2.5)) # typed in extent by hand.

Categories