Overlapping chunks in Xarray dataset for Kernel operations - python

I try to run a 9x9 pixel kernel across a large satellite image with a custom filter. One satellite scene has ~ 40 GB and to fit it into my RAM, I'm using xarrays options to chunk my dataset with dask.
My filter includes a check if the kernel is complete (i.e. not missing data at the edge of the image). In that case a NaN is returned to prevent a potential bias (and I don't really care about the edges). I now realized, that this introduces not only NaNs at the edges of the image (expected behaviour), but also along the edges of each chunk, because the chunks don't overlap. dask provides options to create chunks with an overlap, but are there any comparable capabilities in xarray? I found this issue, but it doesn't seem like there has been any progress in this regard.
Some sample code (shortened version of my original code):
import numpy as np
import numba
import math
import xarray as xr
#numba.jit("f4[:,:](f4[:,:],i4)", nopython = True)
def water_anomaly_filter(input_arr, window_size = 9):
# check if window size is odd
if window_size%2 == 0:
raise ValueError("Window size must be odd!")
# prepare an output array with NaNs and the same dtype as the input
output_arr = np.zeros_like(input_arr)
output_arr[:] = np.nan
# calculate how many pixels in x and y direction around the center pixel
# are in the kernel
pix_dist = math.floor(window_size/2-0.5)
# create a dummy weight matrix
weights = np.ones((window_size, window_size))
# get the shape of the input array
xn,yn = input_arr.shape
# iterate over the x axis
for x in range(xn):
# determine limits of the kernel in x direction
xmin = max(0, x - pix_dist)
xmax = min(xn, x + pix_dist+1)
# iterate over the y axis
for y in range(yn):
# determine limits of the kernel in y direction
ymin = max(0, y - pix_dist)
ymax = min(yn, y + pix_dist+1)
# extract data values inside the kernel
kernel = input_arr[xmin:xmax, ymin:ymax]
# if the kernel is complete (i.e. not at image edge...) and it
# is not all NaN
if kernel.shape == weights.shape and not np.isnan(kernel).all():
# apply the filter. In this example simply keep the original
# value
output_arr[x,y] = input_arr[x,y]
return output_arr
def run_water_anomaly_filter_xr(xds, var_prefix = "band",
window_size = 9):
variables = [x for x in list(xds.variables) if x.startswith(var_prefix)]
for var in variables[:2]:
xds[var].values = water_anomaly_filter(xds[var].values,
window_size = window_size)
return xds
def create_test_nc():
data = np.random.randn(1000, 1000).astype(np.float32)
rows = np.arange(54, 55, 0.001)
cols = np.arange(10, 11, 0.001)
ds = xr.Dataset(
data_vars=dict(
band_1=(["x", "y"], data)
),
coords=dict(
lon=(["x"], rows),
lat=(["y"], cols),
),
attrs=dict(description="Testdata"),
)
ds.to_netcdf("test.nc")
if __name__ == "__main__":
# if required, create test data
create_test_nc()
# import data
with xr.open_dataset("test.nc",
chunks = {"x": 50,
"y": 50},
) as xds:
xds_2 = xr.map_blocks(run_water_anomaly_filter_xr,
xds,
template = xds).compute()
xds_2["band_1"][:200,:200].plot()
This yields:
enter image description here
You can clearly see the rows and columns of NaNs along the edges of each chunk.
I'm happy for any suggestions. I would love to get the overlapping chunks (or any other solution) within xarray, but I'm also open for other solutions.

You can use Dask's map_blocks as follows:
arr = dask.array.map_overlap(
water_anomaly_filter, xds.band_1.data, dtype='f4', depth=4, window_size=9
).compute()
da = xr.DataArray(arr, dims=xds.band_1.dims, coords=xds.band_1.coords)
Note that you will likely want to tune depth and window_size for your specific application.

Related

Python + Image Processing: Efficiently Assign Pixel Values to Nearest Predefined Value

I implemented an algorithm that uses opencv kmeans to quantize the unique brightness values present in a greyscale image. Quantizing the unique values helped avoid biases towards image backgrounds which are typically all the same value.
However, I struggled to find a way to utilize this data to quantize a given input image.
I implemented a very naive solution, but it is unusably slow for the required input sizes (4000x4000):
for x in range(W):
for y in range(H):
center_id = np.argmin([(arr[y,x]-center)**2 for center in centers])
ret_labels2D[y,x] = sortorder.index(center_id)
ret_qimg[y,x] = centers[center_id]
Basically, I am simply adjusting each pixel to the predefined level with the minimum squared error.
Is there any way to do this faster? I was trying to process an image of size 4000x4000 and this implementation was completely unusable.
Full code:
def unique_quantize(arr, K, eps = 0.05, max_iter = 100, max_tries = 20):
"""#param arr: 2D numpy array of floats"""
H, W = arr.shape
unique_values = np.squeeze(np.unique(arr.copy()))
unique_values = np.array(unique_values, float)
if unique_values.ndim == 0:
unique_values = np.array([unique_values],float)
unique_values = np.ravel(unique_values)
unique_values = np.expand_dims(unique_values,1)
Z = unique_values.astype(np.float32)
criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER,max_iter,eps)
compactness, labels, centers = cv2.kmeans(Z,K,None,criteria,max_tries,cv2.KMEANS_RANDOM_CENTERS)
labels = np.ravel(np.squeeze(labels))
centers = np.ravel(np.squeeze(centers))
sortorder = list(np.argsort(centers)) # old index --> index to sortorder
ret_center = centers[sortorder]
ret_labels2D = np.zeros((H,W),int)
ret_qimg = np.zeros((H,W),float)
for x in range(W):
for y in range(H):
center_id = np.argmin([(arr[y,x]-center)**2 for center in centers])
ret_labels2D[y,x] = sortorder.index(center_id)
ret_qimg[y,x] = centers[center_id]
return ret_center, ret_labels2D, ret_qimg
EDIT: I looked at the input file again. The size was actually 12000x12000.
As your image is grayscale (presumably 8 bits), a lookup-table will be an efficient solution. It suffices to map all 256 gray-levels to the nearest center once for all, then use this as a conversion table. Even a 16 bits range (65536 entries) would be significantly accelerated.
I recently thought of a much better answer. This code is not extensively tested, but it worked for the use case in my project.
I made use of obscure fancy-indexing techniques in order to keep the entire algorithm contained within numpy functions.
def unique_quantize(arr, K, eps = 0.05, max_iter = 100, max_tries = 20):
"""#param arr: 2D numpy array of floats"""
H, W = arr.shape
unique_values = np.squeeze(np.unique(arr.copy()))
unique_values = np.array(unique_values, float)
if unique_values.ndim == 0:
unique_values = np.array([unique_values],float)
unique_values = np.ravel(unique_values)
unique_values = np.expand_dims(unique_values,1)
Z = unique_values.astype(np.float32)
criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER,max_iter,eps)
compactness, labels, centers = cv2.kmeans(Z,K,None,criteria,max_tries,cv2.KMEANS_RANDOM_CENTERS)
labels = np.ravel(np.squeeze(labels))
centers = np.ravel(np.squeeze(centers))
sortorder = np.argsort(centers) # old index --> index to sortorder
inverse_sortorder = np.array([list(sortorder).index(i) for i in range(len(centers))],int)
ret_center = centers[sortorder]
ret_labels2D = np.zeros((H,W),int)
ret_qimg = np.zeros((H,W),float)
errors = [np.power((arr-center),2) for center in centers]
errors = np.array(errors,float)
classification = np.squeeze(np.argmin(errors,axis=0))
ret_labels2D = inverse_sortorder[classification]
ret_qimg = centers[classification]
return np.array(ret_center,float), np.array(ret_labels2D,int), np.array(ret_qimg,float)

Try to work around the numpy.core._exceptions._ArrayMemoryError issue within my code

I have a data frame -> data with the shape (10000,257). I need to preprocess this dataframe so that I can use it in LSTM which requires a 3 dimensional input - (nrows,ntimesteps,nfeatures)I am working with the code snippet that is provided here:
def univariate_processing(variable, window):
import numpy as np
# create empty 2D matrix from variable
V = np.empty((len(variable)-window+1, window))
# take each row/time window
for i in range(V.shape[0]):
V[i,:] = variable[i : i+window]
V = V.astype(np.float32) # set common data type
return V
def RNN_regprep(df, y, len_input, len_pred): #, test_size):
# create 3D matrix for multivariate input
X = np.empty((df.shape[0]-len_input+1, len_input, df.shape[1]))
# Iterate univariate preprocessing on all variables - store them in XM
for i in range(df.shape[1]):
X[ : , : , i ] = univariate_processing(df[:,i], len_input)
# create 2D matrix of y sequences
y = y.reshape((-1,)) # reshape to 1D if needed
Y = univariate_processing(y, len_pred)
## Trim dataframes as explained
X = X[ :-(len_pred + 1) , : , : ]
Y = Y[len_input:-1 , :]
# Set common datatype
X = X.astype(np.float32)
Y = Y.astype(np.float32)
return X, Y
X,y = RNN_regprep(data,label, len_ipnut=200,len_pred=1)
While running this the following error is obtained:
numpy.core._exceptions._ArrayMemoryError: Unable to allocate 28.9 GiB for an array with shape (10000, 200, 257) and data type float64
I do understand that this is more of an issue with my memory within my server. I want to know any solution that I can change within my code to see if I can avoid this memory error or try reducing this memory consumption?
This is what windowed views are for. Using my recipe here:
var = np.random.rand(10000,257)
w = window_nd(var, 200, axis = 0)
Now you have a windowed view over var:
w.shape
Out[]: (9801, 200, 257)
But, importantly, it's using the exact same data as var, just looking into it in a windowed way:
w.__array_interface__['data'] #This is the memory's starting address
Out[]: (1448954720320, False)
var.__array_interface__['data']
Out[]: (1448954720320, False)
np.shares_memory(var, w)
Out[]: True
w.base.base.base is var #(lots of rearranging views in the background)
Out[]: True
So you can do:
def univariate_processing(variable, window):
return window_nd(variable, window, axis = 0)
That should significantly reduce your memory allocation, no "magic" required :)
You can also try
from skimage.util import view_as_windows
w = np.squeeze(view_as_windows(var, (200, 1)))
Which does almost the same thing. In this case: your answer would be:
def univariate_processing(variable, window):
from skimage.util import view_as_windows
window = (window,) + (1,)*(len(variable.shape)-1)
return np.squeeze(view_as_windows(variable, window))

Slicing 2D numpy array periodically

I have a numpy array of 300x300 where I want to keep all elements periodically. Specifically, for both axes I want to keep the first 5 elements, then discard 15, keep 5, discard 15, etc. This should result in an array of 75x75 elements. How can this be done?
You can created a 1D mask, that carries out the keep/discard function, and then repeat the mask and apply the mask to the array. Here is an example.
import numpy as np
size = 300
array = np.arange(size).reshape((size, 1)) * np.arange(size).reshape((1, size))
mask = np.concatenate((np.ones(5), np.zeros(15))).astype(bool)
period = len(mask)
mask = np.repeat(mask.reshape((1, period)), repeats=size // period, axis=0)
mask = np.concatenate(mask, axis=0)
result = array[mask][:, mask]
print(result.shape)
You can view the array as series of 20x20 blocks, of which you want to keep the upper-left 5x5 portion. Let's say you have
keep = 5
discard = 15
This only works if
assert all(s % (keep + discard) == 0 for s in arr.shape)
First compute the shape of the view and use it:
block = keep + discard
shape1 = (arr.shape[0] // block, block, arr.shape[1] // block, block)
view = arr.reshape(shape1)[:, :keep, :, :keep]
The following operation will create a copy of the data because the view creates a non-contiguous buffer:
shape2 = (shape1[0] * keep, shape1[2] * keep)
result = view.reshape(shape2)
You can compute shape1 and shape2 in a more general manner with something like
shape1 = tuple(
np.stack((np.array(arr.shape) // block,
np.full(arr.ndim, block)), -1).ravel())
shape2 = tuple(np.array(shape1[::2]) * keep)
I would recommend packaging this into a function.
Here is my first thought of a solution. Will update later if I think of one with fewer lines. This should work even if the input is not square:
output = []
for i in range(len(arr)):
tmp = []
if i % (15+5) < 5: # keep first 5, then discard next 15
for j in range(len(arr[i])):
if j % (15+5) < 5: # keep first 5, then discard next 15
tmp.append(arr[i,j])
output.append(tmp)
Update:
Building off of Yang's answer, here is another way which uses np.tile, which repeats an array a given number of times along each axis. This relies on the input array being square in dimension.
import numpy as np
# Define one instance of the keep/discard box
keep, discard = 5, 15
mask = np.concatenate([np.ones(keep), np.zeros(discard)])
mask_2d = mask.reshape((keep+discard,1)) * mask.reshape((1,keep+discard))
# Tile it out -- overshoot, then trim to match size
count = len(arr)//len(mask_2d) + 1
tiled = np.tile(mask_2d, [count,count]).astype('bool')
tiled = tiled[:len(arr), :len(arr)]
# Apply the mask to the input array
dim = sum(tiled[0])
output = arr[tiled].reshape((dim,dim))
Another option using meshgrid and a modulo:
# MyArray = 300x300 numpy array
r = np.r_[0:300] # A slide from 0->300
xv, yv = np.meshgrid(r, r) # x and y grid
mask = ((xv%20)<5) & ((yv%20)<5) # We create the boolean mask
result = MyArray[mask].reshape((75,75)) # We apply the mask and reshape the final output

How to compute the center of mass of many images without a for loop?

I have 100 images of 10 x 10. I want to put them in a single array of shape 100 x 10 x 10 and then compute the center of mass of the 100 images in one go (without a loop for).
Currently, I am using the function center_of_mass from scipy as below:
import numpy as np
from scipy.ndimage.measurements import center_of_mass
# Example data
image = np.arange(100).reshape(10,10)
images = np.repeat([image],100, axis=0)
result = []
for i in range(images.shape[0]):
result.append( center_of_mass(images[i,:]) )
Is there a way to remove that for loop?
You can use the labels and index arguments to the center_of_mass function (one label per image). The downside is that the memory usage is roughly doubled.
labels = np.ones_like(images).cumsum(0)
result2 = [tup[1:] for tup in
center_of_mass(images, labels, index=np.arange(1, images.shape[0]+1))
]
assert result2 == result
Use reshape matrix and dot product.
By example:
import numpy as np
# Example data
image = np.arange(80).reshape(8,10)
images = np.repeat([image],90, axis=0)
images_row=images.reshape((90, 8*10))
S=np.sum(images_row, axis=1)
Y_mat,X_mat = np.meshgrid(np.arange(10),np.arange(8))
Y_mats = np.repeat([Y_mat],90, axis=0)
Y_mats = Y_mats.reshape((90, 8*10))
X_mats= np.repeat([X_mat],90, axis=0)
X_mats = X_mats.reshape((90, 8*10))
#center of mass:
X_c=np.dot(images_row, X_mats.T)/S
Y_c=np.dot(images_row, Y_mats.T)/S

How to vectorize a code with python numpy.bincount, using apply along axis

I'm trying to vectorize a code with numpy, to run it using multiprocessing, but i can't understand how numpy.apply_along_axis works. This is an example of the code, vectorized using map
import numpy
from scipy import sparse
import multiprocessing
from matplotlib import pyplot
#first i build a matrix of some x positions vs time datas in a sparse format
matrix = numpy.random.randint(2, size = 100).astype(float).reshape(10,10)
x = numpy.nonzero(matrix)[0]
times = numpy.nonzero(matrix)[1]
weights = numpy.random.rand(x.size)
#then i define an array of y positions
nStepsY = 5
y = numpy.arange(1,nStepsY+1)
#now i build an image using x-y-times coordinates and x-times weights
def mapIt(ithStep):
ncolumns = 80
image = numpy.zeros(ncolumns)
yTimed = y[ithStep]*times
positions = (numpy.round(x-yTimed)+50).astype(int)
values = numpy.bincount(positions,weights)
values = values[numpy.nonzero(values)]
positions = numpy.unique(positions)
image[positions] = values
return image
image = list(map(mapIt, range(nStepsY)))
image = numpy.array(image)
a = pyplot.imshow(image, aspect = 10)
Here the output plot
I tried to use numpy.apply_along_axis, but this function allows me to iterate only along the rows of image, while i need to iterate along the ithStep index too. E.g.:
#now i build an image using x-y-times coordinates and x-times weights
nrows = nStepsY
ncolumns = 80
matrix = numpy.zeros(nrows*ncolumns).reshape(nrows,ncolumns)
def applyIt(image):
image = numpy.zeros(ncolumns)
yTimed = y[ithStep]*times
positions = (numpy.round(x-yTimed)+50).astype(int)
values = numpy.bincount(positions,weights)
values = values[numpy.nonzero(values)]
positions = numpy.unique(positions)
image[positions] = values
return image
imageApplied = numpy.apply_along_axis(applyIt,1,matrix)
a = pyplot.imshow(imageApplied, aspect = 10)
It obviously return only the firs row nrows times, since nothing iterates ithStep:
And here the wrong plot
There is a way to iterate an index, or to use an index while numpy.apply_along_axis iterates?
Here the code with only matricial operations: it's quite faster than map or apply_along_axis but uses so much memory.
(in this function i use a trick with scipy.sparse, which works more intuitively than numpy arrays when you try to sum numbers on a same element)
def fullmatrix(nRows, nColumns):
y = numpy.arange(1,nStepsY+1)
image = numpy.zeros((nRows, nColumns))
yTimed = numpy.outer(y,times)
x3d = numpy.outer(numpy.ones(nStepsY),x)
weights3d = numpy.outer(numpy.ones(nStepsY),weights)
y3d = numpy.outer(y,numpy.ones(x.size))
positions = (numpy.round(x3d-yTimed)+50).astype(int)
matrix = sparse.coo_matrix((numpy.ravel(weights3d), (numpy.ravel(y3d), numpy.ravel(positions)))).todense()
return matrix
image = fullmatrix(nStepsY, 80)
a = pyplot.imshow(image, aspect = 10)
This way is simplier and very fast! Thank you so much.
nStepsY = 5
nRows = nStepsY
nColumns = 80
y = numpy.arange(1,nStepsY+1)
image = numpy.zeros((nRows, nColumns))
fakeRow = numpy.zeros(positions.size)
def itermatrix(ithStep):
yTimed = y[ithStep]*times
positions = (numpy.round(x-yTimed)+50).astype(int)
matrix = sparse.coo_matrix((weights, (fakeRow, positions))).todense()
matrix = numpy.ravel(matrix)
missColumns = (nColumns-matrix.size)
zeros = numpy.zeros(missColumns)
matrix = numpy.concatenate((matrix, zeros))
return matrix
for i in numpy.arange(nStepsY):
image[i] = itermatrix(i)
#or, without initialization of image:
imageMapped = list(map(itermatrix, range(nStepsY)))
imageMapped = numpy.array(imageMapped)
It feels like attempting to use map or apply_along_axis is obscuring the essentially iteration of the problem.
I rewrote your code as an explicit loop on y:
nStepsY = 5
y = numpy.arange(1,nStepsY+1)
image = numpy.zeros((nStepsY, 80))
for i, yi in enumerate(y):
yTimed = yi*times
positions = (numpy.round(x-yTimed)+50).astype(int)
values = numpy.bincount(positions,weights)
values = values[numpy.nonzero(values)]
positions = numpy.unique(positions)
image[i, positions] = values
a = pyplot.imshow(image, aspect = 10)
pyplot.show()
Looking at the code, I think I could calculate positions for all y values making a (y.shape[0],times.shape[0]) array. But the rest, the bincount and unique still have to work row by row.
apply_along_axis when working with a 2d array, and axis=1 essentially does:
res = np.zeros_like(arr)
for i in range....:
res[i,:] = func1d(arr[i,:])
If the input array has more dimensions it constructs a more elaborate indexing object [i,j,k,:]. And it can handle cases where func1d returns a different size array than the input. But in any case it is just a generalized iteration tool.
Moving the initial positions creation outside the loop:
yTimed = y[:,None]*times
positions = (numpy.round(x-yTimed)+50).astype(int)
image = numpy.zeros((positions.shape[0], 80))
for i, pos in enumerate(positions):
values = numpy.bincount(pos,weights)
values = values[numpy.nonzero(values)]
pos = numpy.unique(pos)
image[i, pos] = values
Now I can cast this as an apply_along_axis problem, with an applyIt that takes a positions vector (with all the yTimed information) rather than blank image vector.
def applyIt(pos, size, weights):
acolumn = numpy.zeros(size)
values = numpy.bincount(pos,weights)
values = values[numpy.nonzero(values)]
pos = numpy.unique(pos)
acolumn[pos] = values
return acolumn
image = numpy.apply_along_axis(applyIt, 1, positions, 80, weights)
Timing wise I expect it's a bit slower than my explicit iteration. It has to do more setup work, including a test call applyIt(positions[0,:],...) to determine the size of its return array (i.e image has different shape than positions.)
def csrmatrix(y, times, x, weights):
yTimed = numpy.outer(y,times)
n=y.shape[0]
x3d = numpy.outer(numpy.ones(n),x)
weights3d = numpy.outer(numpy.ones(n),weights)
y3d = numpy.outer(y,numpy.ones(x.size))
positions = (numpy.round(x3d-yTimed)+50).astype(int)
#print(y.shape, weights3d.shape, y3d.shape, positions.shape)
matrix = sparse.csr_matrix((numpy.ravel(weights3d), (numpy.ravel(y3d), numpy.ravel(positions))))
#print(repr(matrix))
return matrix
# one call
image = csrmatrix(y, times, x, weights)
# iterative call
alist = []
for yi in numpy.arange(1,nStepsY+1):
alist.append(csrmatrix(numpy.array([yi]), times, x, weights))
def mystack(alist):
# concatenate without offset
row, col, data = [],[],[]
for A in alist:
A = A.tocoo()
row.extend(A.row)
col.extend(A.col)
data.extend(A.data)
print(len(row),len(col),len(data))
return sparse.csr_matrix((data, (row, col)))
vimage = mystack(alist)

Categories