interpolate.griddata uses only one core - python

I am interpolating a 2d numpy array to fill missing values that are marked with NaN. The following code works but only uses one core.
Are there any better functions that I can use to utilize all of the 24 cores that I have?
x = np.arange(0, array.shape[1])
y = np.arange(0, array.shape[0])
#mask invalid values
array = np.ma.masked_invalid(array)
xx, yy = np.meshgrid(x, y)
#get only the valid values
x1 = xx[~array.mask]
y1 = yy[~array.mask]
newarr = array[~array.mask]
GD1 = interpolate.griddata((x1, y1), newarr.ravel(),
(xx, yy),
method='cubic')

I think that you can do this with dask. I am not too familiar with dask but here is a start:
import numpy as np
from scipy import interpolate
import dask.array as da
import matplotlib.pyplot as plt
from dask import delayed
# create data with random missing entries
ar_size = 2000
chunk_size = 500
z_array = np.ones((ar_size, ar_size))
z_array[np.random.randint(0, ar_size-1, 50),
np.random.randint(0, ar_size-1, 50)]= np.nan
# XY coords
x = np.linspace(0, 3, z_array.shape[1])
y = np.linspace(0, 3, z_array.shape[0])
# gen sin wave for testing
z_array = z_array * np.sin(x)
# prove there are nans in the dataset
assert np.isnan(np.sum(z_array))
xx, yy = np.meshgrid(x, y)
print("global x.size: ", xx.size)
# make dask arrays
dask_xyz = da.from_array((xx, yy, z_array), chunks=(3, chunk_size, "auto"), name="dask_all")
dask_xx = dask_xyz[0,:,:]
dask_yy = dask_xyz[1,:,:]
dask_zz = dask_xyz[2,:,:]
# select only valid values
dask_valid_y1 = dask_yy[~da.isnan(dask_zz)]
dask_valid_x1 = dask_xx[~da.isnan(dask_zz)]
dask_newarr = dask_zz[~da.isnan(dask_zz)]
def gd_wrapped(x1, y1, newarr, xx, yy):
# note: linear and cubic griddata impl do not extrapolate
# and therefore fail near the boundaries... see RBF interp instead
print("local x.size: ", x1.size)
gd_zz = interpolate.griddata((x1, y1), newarr.ravel(),
(xx, yy),
method='nearest')
return gd_zz
def rbf_wrapped(x1, y1, newarr, xx, yy):
rbf_interpolant = interpolate.Rbf(x1, y1, newarr, function='linear')
return rbf_interpolant(xx, yy)
# interpolate
# gd_chunked = [delayed(rbf_wrapped)(x1, y1, newarr, xx, yy) for \
gd_chunked = [delayed(gd_wrapped)(x1, y1, newarr, xx, yy) for \
x1, y1, newarr, xx, yy \
in \
zip(dask_valid_x1.to_delayed().flatten(),
dask_valid_y1.to_delayed().flatten(),
dask_newarr.to_delayed().flatten(),
dask_xx.to_delayed().flatten(),
dask_yy.to_delayed().flatten())]
gd_out = delayed(da.concatenate)(gd_chunked, axis=0)
gd_out.visualize("dask_par.png")
gd1 = np.array(gd_out.compute())
print(gd1)
assert gd1.shape == (ar_size, ar_size)
print(gd1.shape)
plt.figure()
plt.imshow(gd1)
plt.savefig("dask_par_sin.png")
# prove we have no more nans in the data
assert ~np.isnan(np.sum(gd1))
There are some issues with this implementation. Griddata cannot extrapolate so nans are an issue at chunk boundaries. You could probably solve this with some overlapping cells. As a stopgap solution you can use method='nearest' or try radial basis function interpolation.

Related

Interpolate between linear and nonlinear values

I have been able to interpolate values successfully from linear values of x to sine-like values of y.
However - I am struggling to interpolate the other way - from nonlinear values of y to linear values of x.
The below is a toy example
import matplotlib.pylab as plt
from scipy import interpolate
#create 100 x values
x = np.linspace(-np.pi, np.pi, 100)
#create 100 values of y where y= sin(x)
y=np.sin(x)
#learn function to map y from x
f = interpolate.interp1d(x, y)
With new values of linear x
xnew = np.array([-1,1])
I get correctly interpolated values of nonlinear y
ynew = f(xnew)
print(ynew)
array([-0.84114583, 0.84114583])
The problem comes when I try and interpolate values of x from y.
I create a new function, the reverse of f:
f2 = interpolate.interp1d(y,x,kind='cubic')
I put in values of y that I successfully interpolated before
ynew=np.array([-0.84114583, 0.84114583])
I am expecting to get the original values of x [-1, 1]
But I get:
array([-1.57328791, 1.57328791])
I have tried putting in other values for the 'kind' parameter with no luck and am not sure if I have got the wrong approach here. Thanks for your help
I guess the problem raises from the fact, that x is not a function of y, since for an arbitrary y value there may be more than one x value found.
Take a look at a truncated range of data.
When x ranges from 0 to np.pi/2, then for every y value there is a unique x value.
In this case the snippet below works as expected.
>>> import numpy as np
>>> from scipy import interpolate
>>> x = np.linspace(0, np.pi / 2, 100)
>>> y = np.sin(x)
>>> f = interpolate.interp1d(x, y)
>>> f([0, 0.1, 0.3, 0.5])
array([0. , 0.09983071, 0.29551713, 0.47941047])
>>> f2 = interpolate.interp1d(y, x)
>>> f2([0, 0.09983071, 0.29551713, 0.47941047])
array([0. , 0.1 , 0.3 , 0.50000001])
Maxim provided the reason for this behavior. This interpolation is a class designed to work for functions. In your case, y=arcsin(x) is only in a limited interval a function. This leads to interesting phenomena in the interpolation routine that interpolates to the nearest y-value which in the case of the arcsin() function is not necessarily the next value in the x-y curve but maybe several periods away. An illustration:
import numpy as np
import matplotlib.pylab as plt
from scipy import interpolate
xmin=-np.pi
xmax=np.pi
fig, axes = plt.subplots(3, 3, figsize=(15, 10))
for i, fac in enumerate([2, 1, 0.5]):
x = np.linspace(xmin * fac, xmax*fac, 100)
y=np.sin(x)
#x->y
f = interpolate.interp1d(x, y)
x_fit = np.linspace(xmin*fac, xmax*fac, 1000)
y_fit = f(x_fit)
axes[i][0].plot(x_fit, y_fit)
axes[i][0].set_ylabel(f"sin period {fac}")
if not i:
axes[i][0].set_title(label="interpolation x->y")
#y->x
f2 = interpolate.interp1d(y, x)
y2_fit = np.linspace(.99 * min(y), .99 * max(y), 1000)
x2_fit = f2(y2_fit)
axes[i][1].plot(x2_fit, y2_fit)
if not i:
axes[i][1].set_title(label="interpolation y->x")
#y->x with cubic interpolation
f3 = interpolate.interp1d(y, x, kind="cubic")
y3_fit = np.linspace(.99 * min(y), .99 * max(y), 1000)
x3_fit = f3(y3_fit)
axes[i][2].plot(x3_fit, y3_fit)
if not i:
axes[i][2].set_title(label="cubic interpolation y->x")
plt.show()
As you can see, the interpolation works along the ordered list of y-values (as you instructed it to), and this works particularly badly with cubic interpolation.

How can I make the contours of quadratic form using matplotlib?

I have the following quadratic form f(x) = x^T A x - b^T x and i've used numpy to define my matrices A, b:
A = np.array([[4,3], [3,7]])
b = np.array([3,-7])
So we're talking about 2 dimensions here, meaning that the contour plot will have the axes x1 and x2 and I want these to span from -4 to 4.
I've tried to experiment by doing
u = np.linspace(-4,4,100)
x, y = np.meshgrid(u,u)
in order to create the 2 axis x1 and x2 but then I dont know how to define my function f(x) and if I do plt.contour(x,y,f) it won't work because the function f(x) is defined with only x as an argument.
Any ideas would be greatly appreciated. Thanks !
EDIT : I managed to "solve" the problem by doing the operations between the quadratic form , for example x^T A x, and ended up with a function of x1,x2 where these are the components of x vector. After that I did
u = np.linspace(-4,4,100)
x, y = np.meshgrid(u,u)
z = 1.5*(x**2) + 3*(y**2) - 2*x + 8*y + 2*x*y #(thats the function i ended up with)
plt.contour(x, y, z)
If Your transformation matrices A, b look like
A = np.array([[4,3], [3,7]])
b = np.array([3,-7])
and Your data look like
u = np.linspace(-4,4,100)
x, y = np.meshgrid(u,u)
x.shape
x and y will have the shapes (100,100).
You can define f(x) as
def f(x):
return np.dot(np.dot(x.T,A),x) - np.dot(b,x)
to then input anything with the shape (2, N) into the function f.
I am unfortunately not sure, which values You want to feed into it.
But one example would be: [(-4:4), (-4:4)]
plt.contour(x, y, f(x[0:2,:]))
update
If the visualization of the contour plot does not fit Your purpose, You can use other plots, e.g. 3D visualizations.
from mpl_toolkits.mplot3d import Axes3D # This import has side effects required for the kwarg projection='3d' in the call to fig.add_subplot
fig = plt.figure(figsize=(40,20))
ax = fig.add_subplot(111, projection='3d')
ax.scatter(x,y, f(x[0:2,:]))
plt.show()
If You expect other values in the z-dimension, the projection f might be off.
For other 3d plots see: https://matplotlib.org/mpl_toolkits/mplot3d/tutorial.html
you could try something like this:
import numpy as np
import matplotlib.pyplot as plt
A = np.array([[4,3], [3,7]])
n_points = 100
u = np.linspace(-4, 4, n_points)
x, y = np.meshgrid(u, u)
X = np.vstack([x.flatten(), y.flatten()])
f_x = np.dot(np.dot(X.T, A), X)
f_x = np.diag(f_x).reshape(n_points, n_points)
plt.figure()
plt.contour(x, y, f_x)
Another alternative is to compute f_x as follows.
f_x = np.zeros((n_points, n_points))
for i in range(n_points):
for j in range(n_points):
in_v = np.array([[x[i][j]], [y[i][j]]])
f_x[i][j] = np.dot(np.dot(in_v.T, A), in_v)

2d interpolation with NaN values in python

I have a 2d matrix (1800*600) with many NaN values.
I would like to conduct a 2d interpolation, which is very simple in matlab.
But if scipy.interpolate.inter2d is used, the result is a NaN matrix. I know the NaN values could be filled using scipy.interpolate.griddata, but I don't want to fulfill the Nan. What other functions can I use to conduct a 2d interpolation?
A workaround using inter2d is to perform two interpolations: one on the filled data (replace the NaNs with an arbitrary value) and one to keep track of the undefined areas. It is then possible to re-assign NaN value to these areas:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from scipy.interpolate import interp2d
# Generate some test data:
x = np.linspace(-2, 2, 40)
y = np.linspace(-2, 2, 41)
xx, yy = np.meshgrid(x, y)
z = xx**2+yy**2
z[ xx**2+yy**2<1 ] = np.nan
# Interpolation functions:
nan_map = np.zeros_like( z )
nan_map[ np.isnan(z) ] = 1
filled_z = z.copy()
filled_z[ np.isnan(z) ] = 0
f = interp2d(x, y, filled_z, kind='linear')
f_nan = interp2d(x, y, nan_map, kind='linear')
# Interpolation on new points:
xnew = np.linspace(-2, 2, 20)
ynew = np.linspace(-2, 2, 21)
z_new = f(xnew, ynew)
nan_new = f_nan( xnew, ynew )
z_new[ nan_new>0.5 ] = np.nan
plt.pcolor(xnew, ynew, z_new);

How can I make my 2D Gaussian fit to my image

I am trying to fit a 2D Gaussian to an image to find the location of the brightest point in it. My code looks like this:
import numpy as np
import astropy.io.fits as fits
import os
from astropy.stats import mad_std
from scipy.optimize import curve_fit
import matplotlib.pyplot as plt
from matplotlib.patches import Circle
from lmfit.models import GaussianModel
from astropy.modeling import models, fitting
def gaussian(xycoor,x0, y0, sigma, amp):
'''This Function is the Gaussian Function'''
x, y = xycoor # x and y taken from fit function. Stars at 0, increases by 1, goes to length of axis
A = 1 / (2*sigma**2)
eq = amp*np.exp(-A*((x-x0)**2 + (y-y0)**2)) #Gaussian
return eq
def fit(image):
med = np.median(image)
image = image-med
image = image[0,0,:,:]
max_index = np.where(image >= np.max(image))
x0 = max_index[1] #Middle of X axis
y0 = max_index[0] #Middle of Y axis
x = np.arange(0, image.shape[1], 1) #Stars at 0, increases by 1, goes to length of axis
y = np.arange(0, image.shape[0], 1) #Stars at 0, increases by 1, goes to length of axis
xx, yy = np.meshgrid(x, y) #creates a grid to plot the function over
sigma = np.std(image) #The standard dev given in the Gaussian
amp = np.max(image) #amplitude
guess = [x0, y0, sigma, amp] #The initial guess for the gaussian fitting
low = [0,0,0,0] #start of data array
#Upper Bounds x0: length of x axis, y0: length of y axis, st dev: max value in image, amplitude: 2x the max value
upper = [image.shape[0], image.shape[1], np.max(image), np.max(image)*2]
bounds = [low, upper]
params, pcov = curve_fit(gaussian, (xx.ravel(), yy.ravel()), image.ravel(),p0 = guess, bounds = bounds) #optimal fit. Not sure what pcov is.
return params
def plotting(image, params):
fig, ax = plt.subplots()
ax.imshow(image)
ax.scatter(params[0], params[1],s = 10, c = 'red', marker = 'x')
circle = Circle((params[0], params[1]), params[2], facecolor = 'none', edgecolor = 'red', linewidth = 1)
ax.add_patch(circle)
plt.show()
data = fits.getdata('AzTECC100.fits') #read in file
med = np.median(data)
data = data - med
data = data[0,0,:,:]
parameters = fit(data)
#generates a gaussian based on the parameters given
plotting(data, parameters)
The image is plotting and the code is giving no errors but the fitting isn't working. It's just putting an x wherever the x0 and y0 are. The pixel values in my image are very small. The max value is 0.0007 and std dev is 0.0001 and the x and y are a few orders of magnitude larger. So I believe my problem is that because of this my eq is going to zero everywhere so the curve_fit is failing. I'm wondering if there's a better way to construct my gaussian so that it plots correctly?
I do not have access to your image. Instead I have generated some test "image" as follows:
y, x = np.indices((51,51))
x -= 25
y -= 25
data = 3 * np.exp(-0.7 * ((x+2)**2 + (y-1)**2))
Also, I have modified your code for plotting to increase the radius of the circle by 10:
circle = Circle((params[0], params[1]), 10 * params[2], ...)
and I commented out two more lines:
# image = image[0,0,:,:]
# data = data[0,0,:,:]
The result that I get is shown in the attached image and it looks reasonable to me:
Could it be that the issue is in how you access data from the FITS file? (e.g., image = image[0,0,:,:]) Are the data 4D array? Why do you have 4 indices?
I also saw that you have asked a similar question here: Astropy.model 2DGaussian issue in which you tried to use just astropy.modeling. I will look into that question.
NOTE: you can replace code such as
max_index = np.where(image >= np.max(image))
x0 = max_index[1] #Middle of X axis
y0 = max_index[0] #Middle of Y axis
with
y0, x0 = np.unravel_index(np.argmax(data), data.shape)

Fitting 2D sum of gaussians, scipy.optimise.leastsq (Ans: Use curve_fit!)

I want to fit an 2D sum of gaussians to this data:
After failing at fitting a sum to this initially I instead sampled each peak separately (image) and returned a fit by find it's moments (essentially using this code).
Unfortunately, this results in an incorrect peak position measurement, due to the overlapping signal of the neighbouring peaks. Below is a plot of the sum of the separate fits. Obviously their peak all lean toward the centre. I need to account for this in order to return the correct peak position.
I've got working code which plots a 2D gaussian envelope function (twoD_Gaussian()), and I parse this through optimize.leastsq as a 1D array using numpy.ravel and an appropriate error function, however this results in a nonsense output.
I tried fitting a single peak within the sum and get the following erroneous output:
I'd appreciate any advice on what i could try to make this work, or alternative approaches if this isn't appropriate. All input welcomed of course!
Code below:
from scipy.optimize import leastsq
import numpy as np
import matplotlib.pyplot as plt
def twoD_Gaussian(amp0, x0, y0, amp1=13721, x1=356, y1=247, amp2=14753, x2=291, y2=339, sigma=40):
x0 = float(x0)
y0 = float(y0)
x1 = float(x1)
y1 = float(y1)
x2 = float(x2)
y2 = float(y2)
return lambda x, y: (amp0*np.exp(-(((x0-x)/sigma)**2+((y0-y)/sigma)**2)/2))+(
amp1*np.exp(-(((x1-x)/sigma)**2+((y1-y)/sigma)**2)/2))+(
amp2*np.exp(-(((x2-x)/sigma)**2+((y2-y)/sigma)**2)/2))
def fitgaussian2D(x, y, data, params):
"""Returns (height, x, y, width_x, width_y)
the gaussian parameters of a 2D distribution found by a fit"""
errorfunction = lambda p: np.ravel(twoD_Gaussian(*p)(*np.indices(np.shape(data))) - data)
p, success = optimize.leastsq(errorfunction, params)
return p
# Create data indices
I = image # Red channel of a scanned image, equivalent to the 1st image displayed in this post.
p = np.asarray(I).astype('float')
w,h = np.shape(I)
x, y = np.mgrid[0:h, 0:w]
xy = (x,y)
# scanned at 150 dpi = 5.91 dots per mm
dpmm = 5.905511811
plot_width = 40*dpmm
# create function indices
fdims = np.round(plot_width/2)
xdims = (RC[0] - fdims, RC[0] + fdims)
ydims = (RC[1] - fdims, RC[1] + fdims)
fx = np.linspace(xdims[0], xdims[1], np.round(plot_width))
fy = np.linspace(ydims[0], ydims[1], np.round(plot_width))
fx,fy = np.meshgrid(fx,fy)
#Crop image for display
crp_data = image[xdims[0]:xdims[1], ydims[0]:ydims[1]]
z = crp_data
# Parameters obtained from separate fits
Amplitudes = (13245, 13721, 15374)
px = (410, 356, 290)
py = (350, 247, 339)
initial_guess_sum = (Amp[0], px[0], py[0], Amp[1], px[1], py[1], Amp[2], px[2], py[2])
initial_guess_peak3 = (Amp[0], px[0], py[0]) # Try fitting single peak within sum
fitted_pars = fitgaussian2D(x, y, z, initial_guess_sum)
#fitted_pars = fitgaussian2D(x, y, z, initial_guess_peak3)
data_fitted= twoD_Gaussian(*fitted_pars)(fx,fy)
#data_fitted= twoD_Gaussian(*initial_guess_sum)(fx,fy)
fig = plt.figure(figsize=(10, 30))
ax = fig.add_subplot(111, aspect="equal")
#fig, ax = plt.subplots(1)
cb = ax.imshow(p, cmap=plt.cm.jet, origin='bottom',
extent=(x.min(), x.max(), y.min(), y.max()))
ax.contour(fx, fy, data_fitted.reshape(fx.shape[0], fy.shape[1]), 4, colors='w')
ax.set_xlim(np.int(RC[0])-135, np.int(RC[0])+135)
ax.set_ylim(np.int(RC[1])+135, np.int(RC[1])-135)
#plt.colorbar(cb)
plt.show()
I tried any number of other things before giving up and trying curve_fit again, albeit with more knowledge of parsing lambda functions. It worked. Example output and code below (still with redundancies) for the sake of posterity.
def twoD_Gaussian(amp0, x0, y0, amp1=13721, x1=356, y1=247, amp2=14753, x2=291, y2=339, sigma=40):
x0 = float(x0)
y0 = float(y0)
x1 = float(x1)
y1 = float(y1)
x2 = float(x2)
y2 = float(y2)
return lambda x, y: (amp0*np.exp(-(((x0-x)/sigma)**2+((y0-y)/sigma)**2)/2))+(
amp1*np.exp(-(((x1-x)/sigma)**2+((y1-y)/sigma)**2)/2))+(
amp2*np.exp(-(((x2-x)/sigma)**2+((y2-y)/sigma)**2)/2))
def twoD_GaussianCF(xy, amp0, x0, y0, amp1=13721, amp2=14753, x1=356, y1=247, x2=291, y2=339, sigma_x=12, sigma_y=12):
x0 = float(x0)
y0 = float(y0)
x1 = float(x1)
y1 = float(y1)
x2 = float(x2)
y2 = float(y2)
g = (amp0*np.exp(-(((x0-x)/sigma_x)**2+((y0-y)/sigma_y)**2)/2))+(
amp1*np.exp(-(((x1-x)/sigma_x)**2+((y1-y)/sigma_y)**2)/2))+(
amp2*np.exp(-(((x2-x)/sigma_x)**2+((y2-y)/sigma_y)**2)/2))
return g.ravel()
# Create data indices
I = image # Red channel of a scanned image, equivalent to the 1st image displayed in this post.
p = np.asarray(I).astype('float')
w,h = np.shape(I)
x, y = np.mgrid[0:h, 0:w]
xy = (x,y)
N_points = 3
display_width = 80
initial_guess_sum = (Amp[0], px[0], py[0], Amp[1], px[1], py[1], Amp[2], px[2], py[2])
popt, pcov = opt.curve_fit(twoD_GaussianCF, xy, np.ravel(p), p0=initial_guess_sum)
data_fitted= twoD_Gaussian(*popt)(x,y)
peaks = [(popt[1],popt[2]), (popt[5],popt[6]), (popt[7],popt[8])]
fig = plt.figure(figsize=(10, 10))
ax = fig.add_subplot(111, aspect="equal")
cb = ax.imshow(p, cmap=plt.cm.jet, origin='bottom',
extent=(x.min(), x.max(), y.min(), y.max()))
ax.contour(x, y, data_fitted.reshape(x.shape[0], y.shape[1]), 20, colors='w')
ax.set_xlim(np.int(RC[0])-135, np.int(RC[0])+135)
ax.set_ylim(np.int(RC[1])+135, np.int(RC[1])-135)
for k in range(0,N_points):
plt.plot(peaks[k][0],peaks[k][1],'bo',markersize=7)
plt.show()
If all you care about is the centroid of each gaussian, I would just go with scipy.optimize.minimize. Multiply your data by -1 and then do some coarse sampling to find minima. The height of each peak will be offset by the neighboring gaussians but the positions are unchanged, so if you find a local extreme value then that must be the centroid of a gaussian.
If you need the other parameters, it might make sense to find the centroids as I suggest and then use leastsq to find the amplitudes and widths. It might add a lot of overhead if you're running these fits many times, but it would significantly reduce the number of free parameters in the least squares fit.

Categories