numpy - do operation along specified axis - python

So I want to implement a matrix standardisation method.
To do that, I've been told to
subtract the mean and divide by the standard deviation for each dimension
And to verify:
after this processing, each dimension has zero mean and unit variance.
That sounds simple enough ...
import numpy as np
def standardize(X : np.ndarray,inplace=True,verbose=False,check=False):
ret = X
if not inplace:
ret = X.copy()
ndim = np.ndim(X)
for d in range(ndim):
m = np.mean(ret,axis=d)
s = np.std(ret,axis=d)
if verbose:
print(f"m{d} =",m)
print(f"s{d} =",s)
# TODO: handle zero s
# TODO: subtract m along the correct axis
# TODO: divide by s along the correct axis
if check:
means = [np.mean(X,axis=d) for d in range(ndim)]
stds = [np.std(X,axis=d) for d in range(ndim)]
if verbose:
print("means=\n",means)
print("stds=\n",stds)
assert all(all(m < 1e-15 for m in mm) for mm in means)
assert all(all(s == 1.0 for s in ss) for ss in stds)
return ret
e.g. for ndim == 2, we could get something like
A=
[[ 0.40923704 0.91397416 0.62257397]
[ 0.15614258 0.56720836 0.80624135]]
m0 = [ 0.28268981 0.74059126 0.71440766] # can broadcast with ret -= m0
s0 = [ 0.12654723 0.1733829 0.09183369] # can broadcast with ret /= s0
m1 = [ 0.33333333 -0.33333333] # ???
s1 = [ 0.94280904 0.94280904] # ???
How do I do that?
Judging by Broadcast an operation along specific axis in python , I thought I may be looking for a way to create
m[None, None, None, .., None, : , None, None, .., None]
Where there is exactly one : at index d.
But even if I knew how to do that, I'm not sure it'd work.

You can swap your axes such that the first axes is the one you want to normalize. This should also work inplace, since swapaxes just returns a view on your data.
Using the numpy command swapaxes:
for d in range(ndim):
m = np.mean(ret,axis=d)
s = np.std(ret,axis=d)
ret = np.swapaxes(ret, 0, d)
# Perform Normalisation of Axis
ret -= m
ret /= s
ret = np.swapaxes(ret, 0, d)

Related

Efficient ways of calculating affinity only for local neighbourhood in a large graph

I am relatively new to python and numpy and am trying to cluster a dense matrix with floating point numbers and having dimensions of 256x256 using spectral clustering. Since the affinity matrix will be of size 65536x65536, a full affinity matrix cannot be computed (due to memory limitations). As such, I am currently calculating the affinity between a given matrix entry and its 5x5 local neighbourhood, and build a sparse graph (in 3-tuple representation).
To do so, I am using for loops (basically, a sliding widow approach) which I think is not the most efficient way of doing so.
import numpy as np
def getAffinity(f1, f2):
return np.exp(-np.linalg.norm(np.absolute(f1 - f2))/ 2.1)
G = np.arange(256*256).reshape((256,256))
dim1 = 256 # Dimension 1 of matrix
dim2 = 256 # Dimension 1 of matrix
values = np.zeros(1623076, dtype=np.float32) # To hold affinities
rows = np.zeros(1623076, dtype=np.int32) # To hold row index
cols = np.zeros(1623076, dtype=np.int32) # To hold column index
index = 0 # To hold column index
for i in range(dim1):
for j in range(dim2):
current = G[i, j]
for k in range(np.maximum(0, i-2), np.minimum(dim1 , i+3)): # traverse rows
for l in range(np.maximum(0, j-2), np.minimum(dim2 , j+3)): # traverse columns
rows[index] = i*d1 + j
cols[index] = k*d1 + l
values[index] = getAffinity(current, G[k, l])
index += 1
I was wondering whether there are any other efficient ways of achieving the same goal.
Here is a sparse matrix approach. It is >800x faster than the loopy code.
import numpy as np
from scipy import sparse
from time import perf_counter as pc
T = []
T.append(pc())
def getAffinity(f1, f2):
return np.exp(-np.linalg.norm(np.absolute(f1 - f2))/ 2.1)
G = 2*np.arange(256*256).reshape((256,256))
dim1 = 256 # Dimension 1 of matrix
dim2 = 256 # Dimension 1 of matrix
values = np.zeros(1623076, dtype=np.float32) # To hold affinities
rows = np.zeros(1623076, dtype=np.int32) # To hold row index
cols = np.zeros(1623076, dtype=np.int32) # To hold column index
index = 0 # To hold column index
for i in range(dim1):
for j in range(dim2):
current = G[i, j]
for k in range(np.maximum(0, i-2), np.minimum(dim1 , i+3)): # traverse rows
for l in range(np.maximum(0, j-2), np.minimum(dim2 , j+3)): # traverse columns
rows[index] = i*dim1 + j
cols[index] = k*dim1 + l
values[index] = getAffinity(current, G[k, l])
index += 1
T.append(pc())
affs_OP = sparse.coo_matrix((values,(rows,cols))).tocsr()
import scipy.sparse as sp
def getAffinity(f1, f2): # similar to #PaulPanzer, I don't think OP is right
return np.exp(-np.abs(f1 - f2)/ 2.1)
def affinity_block(dim = 256, dist = 2):
i = np.arange(-dist, dist+1)
init_block = sp.dia_matrix((np.ones((i.size, dim)), i), (dim, dim))
out = sp.kron(init_block, init_block).tocoo()
out.data = getAffinity(Gf[out.row], Gf[out.col])
return out
T.append(pc())
Gf = G.ravel()
offsets = np.concatenate((np.mgrid[1:3,-2:3].reshape(2,-1).T,np.mgrid[:1,1:3].reshape(2,-1).T), axis=0)
def make_diag(yo,xo):
o = 256*yo+xo
diag = np.exp(-np.abs(Gf[o:]-Gf[:-o])/2.1)
if xo>0:
diag[:xo-256].reshape(-1,256)[:,-xo:] = 0
elif xo<0:
diag[:xo].reshape(-1,256)[:,:-xo] = 0
diag[xo:] = 0
return diag
diags = [make_diag(*o) for o in offsets]
offsets = np.sum(offsets*[256,1], axis=1)
affs_pp = sparse.diags([*diags,[np.ones(256*256)],*diags],np.concatenate([offsets,[0],-offsets]))
T.append(pc())
affs_df = affinity_block()
T.append(pc())
print("OP: {:.3f} s convert OP to sparse matrix: {:.3f} s pp {:.3f} s df: {:.3f} s".format(*np.diff(T)))
diff = affs_pp-affs_OP
diff *= diff.sign()
md = diff.max()
print(f"max deviation pp-OP: {md}")
print(f"number of different entries pp-df: {(affs_pp-affs_df).nnz}")
Sample run:
OP: 23.392 s convert OP to sparse matrix: 0.020 s pp 0.025 s df: 0.093 s
max deviation pp-OP: 2.0616356788405454e-08
number of different entries pp-df: 0
A bit of explanation, 1D first to keep it simple. Let's imagine an actually sliding window, so we can use time as an intuitive axis:
space
+-------------->
|
t | xo... x: window center
i | oxo.. o: window off center
m | .oxo. .: non window
e | ..oxo
| ...ox
v
time here actually is equivalent to space because we move with constant speed. We can now see that all the window points can be described as three diagonals. Offsets are 0, 1 and -1 but note that because the affinities are symmetric and the one for 0 is trivial, we need only calculate them for 1.
Now lets skip to 2D, the smallest example we can do is 3x3 window in 4x4 array. In row major this looks like.
xo..oo..........
oxo.ooo.........
.oxo.ooo........
..ox..oo........
oo..xo..oo......
ooo.oxo.ooo.....
.ooo.oxo.ooo....
..oo..ox..oo....
....oo..xo..oo..
....ooo.oxo.ooo.
.....ooo.oxo.ooo
......oo..ox..oo
........oo..xo..
........ooo.oxo.
.........ooo.oxo
..........oo..ox
The relevant offsets are (0,1),(1,-1),(1,0),(1,1) or in row major 0x4+1 = 1, 1x4-1 = 3, 1x4+0 = 4, 1x4+1 = 5. Also note that most of these diagonals are not complete, the missing bits explained by row major wrapping around, i.e. at z = y,x x = 3 the right neighbor z+1 is not actually a right neighbor y,x+1 ; instead, because of line jump, it is y+1,0 The if-else clause in the code above blanks the right bits of each diagonal.
#DanielF's strategy is similar but takes advantage of the block structure evident in the figure.
xo.. oo.. .... ....
oxo. ooo. .... ....
.oxo .ooo .... ....
..ox ..oo .... ....
oo.. xo.. oo.. ....
ooo. oxo. ooo. ....
.ooo .oxo .ooo ....
..oo ..ox ..oo ....
.... oo.. xo.. oo..
.... ooo. oxo. ooo.
.... .ooo .oxo .ooo
.... ..oo ..ox ..oo
.... .... oo.. xo..
.... .... ooo. oxo.
.... .... .ooo .oxo
.... .... ..oo ..ox
This seems to be a bit more elegant and extensible, albeit a bit (4x) slower, way to do the same thing as #PaulPanzer
import scipy.sparse as sp
from functools import reduce
def getAffinity(f1, f2): # similar to #PaulPanzer, I don't think OP is right
return np.exp(-np.abs(f1 - f2)/ 2.1)
def affinity_block(G, dist = 2):
Gf = G.ravel()
i = np.arange(-dist, dist+1)
init_blocks = [1]
for dim in G.shape:
init_blocks.append(sp.dia_matrix((np.ones((i.size, dim)), i), (dim, dim)))
out = reduce(sp.kron, init_blocks).tocoo()
out.data = getAffinity(Gf[out.row], Gf[out.col])
return out
This allows non-square G matrices, and higher dimensions.

Removing axis argument from numpy argmin, but still vectorized

So i have the following lines of code
np.argmin(distances, axis = 0)
Here distances is a distances matrix between k centroids, and n points. so it's a k x n matrix.
So with this line of code i'm trying to find the closest centroid for each point, by taking the argmin along axis 0.
My goal is to have a similar vectorized code without the axis argument, as it is not implemented in the fork of numpy i'm using.
Any help would be nice :)
Here's a vectorized one -
def partial_argsort(a):
idar = np.zeros(a.max()+1,dtype=int)
idar[a] = np.arange(len(a))
return idar[np.sort(a)]
def argmin_0(a):
# Define a scaling array to scale each col such that each col is
# offsetted against its previous one
s = (a.max()+1)*np.arange(a.shape[1])
# Scale each col, flatten with col-major order. Find global partial-argsort.
# With the offsetting, those argsort indices would be limited to per-col
# Subtract each group of ncols elements based on the offsetting.
m,n = a.shape
a1D = (a+s).T.ravel()
return partial_argsort(a1D)[::m]-m*np.arange(n)
Sample run for verification -
In [442]: np.random.seed(0)
...: a = np.random.randint(11,9999,(1000,1000))
...: idx0 = argmin_0(a)
...: idx1 = a.argmin(0)
...: r = np.arange(len(idx0))
...: print (a[idx0,r] == a[idx1,r]).all()
True

How to collapse two array axis together of a numpy array?

Basic idea: I have an array of images images=np.array([10, 28, 28, 3]). So 10 images 28x28 pixels with 3 colour channels. I want to stitch them together in one long line: single_image.shape # [280, 28, 3]. What would be the best numpy based function for that?
More generally: is there a function along the lines of stitch(array, source_axis=0, target_axis=1) that would transform an array A.shape # [a0, a1, source_axis, a4, target_axis, a6] into a shape B.shape # [a0, a1, a4, target_axis*source_axis, a6] by concatenating subarrays A[:,:,i,:,:,:] along axis=target_axis
You can set it up with a single moveaxis + reshape combo -
def merge_axis(array, source_axis=0, target_axis=1):
shp = a.shape
L = shp[source_axis]*shp[target_axis] # merged axis len
out_shp = np.insert(np.delete(shp,(source_axis,target_axis)),target_axis-1,L)
return np.moveaxis(a,source_axis,target_axis-1).reshape(out_shp)
Alternatively, out_shp could be setup with array manipulations and might be easier to follow, like so -
shp = np.array(a.shape)
shp[target_axis] *= shp[source_axis]
out_shp = np.delete(shp,source_axis)
If source and target axes are adjacent ones, we can skip moveaxis and simply reshape and the additional benefit would be that the output would be a view into the input and hence virtually free on runtime. So, we will introduce a If-conditional to check and modify our implementations to something like these -
def merge_axis_v1(array, source_axis=0, target_axis=1):
shp = a.shape
L = shp[source_axis]*shp[target_axis] # merged_axis_len
out_shp = np.insert(np.delete(shp,(source_axis,target_axis)),target_axis-1,L)
if target_axis==source_axis+1:
return a.reshape(out_shp)
else:
return np.moveaxis(a,source_axis,target_axis-1).reshape(out_shp)
def merge_axis_v2(array, source_axis=0, target_axis=1):
shp = np.array(a.shape)
shp[target_axis] *= shp[source_axis]
out_shp = np.delete(shp,source_axis)
if target_axis==source_axis+1:
return a.reshape(out_shp)
else:
return np.moveaxis(a,source_axis,target_axis-1).reshape(out_shp)
Verify views -
In [156]: a = np.random.rand(10,10,10,10,10)
In [157]: np.shares_memory(merge_axis_v1(a, source_axis=0, target_axis=1),a)
Out[157]: True
Here is my take:
def merge_axis(array, source_axis=0, target_axis=1):
array = np.moveaxis(array, source_axis, 0)
array = np.moveaxis(array, target_axis, 1)
array = np.concatenate(array)
array = np.moveaxis(array, 0, target_axis-1)
return array

Python: binned_statistic_2d mean calculation ignoring NaNs in data

I am using scipy.stats.binned_statistic_2d to bin irregular data onto a uniform grid by finding the mean of points within every bin.
x,y = np.meshgrid(sort(np.random.uniform(0,1,100)),sort(np.random.uniform(0,1,100)))
z = np.sin(x*y)
statistic, xedges, yedges, binnumber = sp.stats.binned_statistic_2d(x.ravel(), y.ravel(), values=z.ravel(), statistic='mean',bins=[np.arange(0,1.1,.1), np.arange(0,1.1,.1)])
plt.figure(1)
plt.pcolormesh(x,y,z, vmin = 0, vmax = 1)
plt.figure(2)
plt.pcolormesh(xedges,yedges,statistic, vmin = 0, vmax = 1)
Produces these plots, as expected:
Scattered data:
Gridded data:
But the data I want to grid has NaNs in it. This is what the result is like when I add NaNs:
x,y = np.meshgrid(sort(np.random.uniform(0,1,100)),sort(np.random.uniform(0,1,100)))
z = np.sin(x*y)
z[50:55,50:55] = np.nan
statistic, xedges, yedges, binnumber = binned_statistic_2d(x.ravel(), y.ravel(), values=z.ravel(), statistic='mean',bins=[np.arange(0,1.1,.1), np.arange(0,1.1,.1)])
plt.figure(3)
plt.pcolormesh(x,y,z, vmin = 0, vmax = 1)
plt.figure(4)
plt.pcolormesh(xedges,yedges,statistic, vmin = 0, vmax = 1)
Scattered:
Gridded:
Obviously if a bin is entirely filled with NaNs, the the resulting mean of that bin should still be NaN. However, I would like bins that are not entirely filled with NaNs to just result in the mean of the non-NaN numbers.
I've tried replacing the "statistic" argument in sp.stats.binned_statistic_2d with np.nanmean. This works, but it goes very very slowly when I use it on large datasets. I've tried digging into the underlying code of `sp.stats.binned_statistic_2d', but I can't figure out exactly how it is calculating the mean, or how to make it ignore NaNs in it's calculation.
Any ideas?
I had the same problem and changed the definition of binned_statistic_dd in scipy.stats and saved a local copy so that it won't be changed if scipy is updated.
I added 'nanmean' to the list of known_stats and
elif statistic == 'nanmean':
result.fill(np.nan)
for i in np.unique(binnumbers):
for vv in builtins.range(Vdim):
result[vv, i] = np.nanmean(values[vv, binnumbers == i])
Full new definition:
def binned_statistic_dd(sample, values, statistic='mean',
bins=10, range=None, expand_binnumbers=False,
binned_statistic_result=None):
"""
Compute a multidimensional binned statistic for a set of data.
This is a generalization of a histogramdd function. A histogram divides
the space into bins, and returns the count of the number of points in
each bin. This function allows the computation of the sum, mean, median,
or other statistic of the values within each bin.
Parameters
----------
sample : array_like
Data to histogram passed as a sequence of N arrays of length D, or
as an (N,D) array.
values : (N,) array_like or list of (N,) array_like
The data on which the statistic will be computed. This must be
the same shape as `sample`, or a list of sequences - each with the
same shape as `sample`. If `values` is such a list, the statistic
will be computed on each independently.
statistic : string or callable, optional
The statistic to compute (default is 'mean').
The following statistics are available:
* 'mean' : compute the mean of values for points within each bin.
Empty bins will be represented by NaN.
* 'median' : compute the median of values for points within each
bin. Empty bins will be represented by NaN.
* 'count' : compute the count of points within each bin. This is
identical to an unweighted histogram. `values` array is not
referenced.
* 'sum' : compute the sum of values for points within each bin.
This is identical to a weighted histogram.
* 'std' : compute the standard deviation within each bin. This
is implicitly calculated with ddof=0. If the number of values
within a given bin is 0 or 1, the computed standard deviation value
will be 0 for the bin.
* 'min' : compute the minimum of values for points within each bin.
Empty bins will be represented by NaN.
* 'max' : compute the maximum of values for point within each bin.
Empty bins will be represented by NaN.
* function : a user-defined function which takes a 1D array of
values, and outputs a single numerical statistic. This function
will be called on the values in each bin. Empty bins will be
represented by function([]), or NaN if this returns an error.
bins : sequence or positive int, optional
The bin specification must be in one of the following forms:
* A sequence of arrays describing the bin edges along each dimension.
* The number of bins for each dimension (nx, ny, ... = bins).
* The number of bins for all dimensions (nx = ny = ... = bins).
range : sequence, optional
A sequence of lower and upper bin edges to be used if the edges are
not given explicitly in `bins`. Defaults to the minimum and maximum
values along each dimension.
expand_binnumbers : bool, optional
'False' (default): the returned `binnumber` is a shape (N,) array of
linearized bin indices.
'True': the returned `binnumber` is 'unraveled' into a shape (D,N)
ndarray, where each row gives the bin numbers in the corresponding
dimension.
See the `binnumber` returned value, and the `Examples` section of
`binned_statistic_2d`.
binned_statistic_result : binnedStatisticddResult
Result of a previous call to the function in order to reuse bin edges
and bin numbers with new values and/or a different statistic.
To reuse bin numbers, `expand_binnumbers` must have been set to False
(the default)
.. versionadded:: 0.17.0
Returns
-------
statistic : ndarray, shape(nx1, nx2, nx3,...)
The values of the selected statistic in each two-dimensional bin.
bin_edges : list of ndarrays
A list of D arrays describing the (nxi + 1) bin edges for each
dimension.
binnumber : (N,) array of ints or (D,N) ndarray of ints
This assigns to each element of `sample` an integer that represents the
bin in which this observation falls. The representation depends on the
`expand_binnumbers` argument. See `Notes` for details.
See Also
--------
numpy.digitize, numpy.histogramdd, binned_statistic, binned_statistic_2d
Notes
-----
Binedges:
All but the last (righthand-most) bin is half-open in each dimension. In
other words, if `bins` is ``[1, 2, 3, 4]``, then the first bin is
``[1, 2)`` (including 1, but excluding 2) and the second ``[2, 3)``. The
last bin, however, is ``[3, 4]``, which *includes* 4.
`binnumber`:
This returned argument assigns to each element of `sample` an integer that
represents the bin in which it belongs. The representation depends on the
`expand_binnumbers` argument. If 'False' (default): The returned
`binnumber` is a shape (N,) array of linearized indices mapping each
element of `sample` to its corresponding bin (using row-major ordering).
If 'True': The returned `binnumber` is a shape (D,N) ndarray where
each row indicates bin placements for each dimension respectively. In each
dimension, a binnumber of `i` means the corresponding value is between
(bin_edges[D][i-1], bin_edges[D][i]), for each dimension 'D'.
.. versionadded:: 0.11.0
Examples
--------
>>> from scipy import stats
>>> import matplotlib.pyplot as plt
>>> from mpl_toolkits.mplot3d import Axes3D
Take an array of 600 (x, y) coordinates as an example.
`binned_statistic_dd` can handle arrays of higher dimension `D`. But a plot
of dimension `D+1` is required.
>>> mu = np.array([0., 1.])
>>> sigma = np.array([[1., -0.5],[-0.5, 1.5]])
>>> multinormal = stats.multivariate_normal(mu, sigma)
>>> data = multinormal.rvs(size=600, random_state=235412)
>>> data.shape
(600, 2)
Create bins and count how many arrays fall in each bin:
>>> N = 60
>>> x = np.linspace(-3, 3, N)
>>> y = np.linspace(-3, 4, N)
>>> ret = stats.binned_statistic_dd(data, np.arange(600), bins=[x, y],
... statistic='count')
>>> bincounts = ret.statistic
Set the volume and the location of bars:
>>> dx = x[1] - x[0]
>>> dy = y[1] - y[0]
>>> x, y = np.meshgrid(x[:-1]+dx/2, y[:-1]+dy/2)
>>> z = 0
>>> bincounts = bincounts.ravel()
>>> x = x.ravel()
>>> y = y.ravel()
>>> fig = plt.figure()
>>> ax = fig.add_subplot(111, projection='3d')
>>> with np.errstate(divide='ignore'): # silence random axes3d warning
... ax.bar3d(x, y, z, dx, dy, bincounts)
Reuse bin numbers and bin edges with new values:
>>> ret2 = stats.binned_statistic_dd(data, -np.arange(600),
... binned_statistic_result=ret,
... statistic='mean')
"""
known_stats = ['mean', 'median', 'count', 'sum', 'std', 'min', 'max',
'nanmean']
if not callable(statistic) and statistic not in known_stats:
raise ValueError('invalid statistic %r' % (statistic,))
try:
bins = index(bins)
except TypeError:
# bins is not an integer
pass
# If bins was an integer-like object, now it is an actual Python int.
# NOTE: for _bin_edges(), see e.g. gh-11365
if isinstance(bins, int) and not np.isfinite(sample).all():
raise ValueError('%r contains non-finite values.' % (sample,))
# `Ndim` is the number of dimensions (e.g. `2` for `binned_statistic_2d`)
# `Dlen` is the length of elements along each dimension.
# This code is based on np.histogramdd
try:
# `sample` is an ND-array.
Dlen, Ndim = sample.shape
except (AttributeError, ValueError):
# `sample` is a sequence of 1D arrays.
sample = np.atleast_2d(sample).T
Dlen, Ndim = sample.shape
# Store initial shape of `values` to preserve it in the output
values = np.asarray(values)
input_shape = list(values.shape)
# Make sure that `values` is 2D to iterate over rows
values = np.atleast_2d(values)
Vdim, Vlen = values.shape
# Make sure `values` match `sample`
if(statistic != 'count' and Vlen != Dlen):
raise AttributeError('The number of `values` elements must match the '
'length of each `sample` dimension.')
try:
M = len(bins)
if M != Ndim:
raise AttributeError('The dimension of bins must be equal '
'to the dimension of the sample x.')
except TypeError:
bins = Ndim * [bins]
if binned_statistic_result is None:
nbin, edges, dedges = _bin_edges(sample, bins, range)
binnumbers = _bin_numbers(sample, nbin, edges, dedges)
else:
edges = binned_statistic_result.bin_edges
nbin = np.array([len(edges[i]) + 1 for i in builtins.range(Ndim)])
# +1 for outlier bins
dedges = [np.diff(edges[i]) for i in builtins.range(Ndim)]
binnumbers = binned_statistic_result.binnumber
result = np.empty([Vdim, nbin.prod()], float)
if statistic == 'mean':
result.fill(np.nan)
flatcount = np.bincount(binnumbers, None)
a = flatcount.nonzero()
for vv in builtins.range(Vdim):
flatsum = np.bincount(binnumbers, values[vv])
result[vv, a] = flatsum[a] / flatcount[a]
elif statistic == 'std':
result.fill(0)
flatcount = np.bincount(binnumbers, None)
a = flatcount.nonzero()
for vv in builtins.range(Vdim):
for i in np.unique(binnumbers):
# NOTE: take std dev by bin, np.std() is 2-pass and stable
binned_data = values[vv, binnumbers == i]
# calc std only when binned data is 2 or more for speed up.
if len(binned_data) >= 2:
result[vv, i] = np.std(binned_data)
elif statistic == 'count':
result.fill(0)
flatcount = np.bincount(binnumbers, None)
a = np.arange(len(flatcount))
result[:, a] = flatcount[np.newaxis, :]
elif statistic == 'sum':
result.fill(0)
for vv in builtins.range(Vdim):
flatsum = np.bincount(binnumbers, values[vv])
a = np.arange(len(flatsum))
result[vv, a] = flatsum
elif statistic == 'median':
result.fill(np.nan)
for i in np.unique(binnumbers):
for vv in builtins.range(Vdim):
result[vv, i] = np.median(values[vv, binnumbers == i])
elif statistic == 'min':
result.fill(np.nan)
for i in np.unique(binnumbers):
for vv in builtins.range(Vdim):
result[vv, i] = np.min(values[vv, binnumbers == i])
elif statistic == 'max':
result.fill(np.nan)
for i in np.unique(binnumbers):
for vv in builtins.range(Vdim):
result[vv, i] = np.max(values[vv, binnumbers == i])
elif statistic == 'nanmean':
result.fill(np.nan)
for i in np.unique(binnumbers):
for vv in builtins.range(Vdim):
result[vv, i] = np.nanmean(values[vv, binnumbers == i])
elif callable(statistic):
with np.errstate(invalid='ignore'), suppress_warnings() as sup:
sup.filter(RuntimeWarning)
try:
null = statistic([])
except Exception:
null = np.nan
result.fill(null)
for i in np.unique(binnumbers):
for vv in builtins.range(Vdim):
result[vv, i] = statistic(values[vv, binnumbers == i])
# Shape into a proper matrix
result = result.reshape(np.append(Vdim, nbin))
# Remove outliers (indices 0 and -1 for each bin-dimension).
core = tuple([slice(None)] + Ndim * [slice(1, -1)])
result = result[core]
# Unravel binnumbers into an ndarray, each row the bins for each dimension
if(expand_binnumbers and Ndim > 1):
binnumbers = np.asarray(np.unravel_index(binnumbers, nbin))
if np.any(result.shape[1:] != nbin - 2):
raise RuntimeError('Internal Shape Error')
# Reshape to have output (`result`) match input (`values`) shape
result = result.reshape(input_shape[:-1] + list(nbin-2))
return BinnedStatisticddResult(result, edges, binnumbers)

Full Frequency Array Reconstruction after numpy.fft.rfftn

I have a real valued grayscale 3D image with resolution rows x cols x deps. I take the dft of the image using freq = numpy.fft.rfftn(myImage)
The returned array, freq, is resolution: rows x cols x deps/2 + 1. I want to reconstruct freq as if it were the output of numpy.fft.fftn(myImage), that is, I want the dimensions of freq to be rows x cols x deps.
I know that the correspondence for real-valued dft is X_(k1,k2,k3) = X*_(N1-k1,N2-k2,N3-k3) where * is the conjugate transpose.
I could reconstruct the full freq array using a loop, but that'll be too slow, but I'm having trouble figuring out the correct way of doing it with array slicing.
Thanks!
FYI, I need the full array because I'll be element wise multiplying it with another array of full size rows x cols x deps, I cannot assume that array has any structure (like symmetry) that would make it unnecessary for me to reconstruct the full freq array.
I got it!
import numpy as np
import time
rows = 181
cols = 217
deps = 181
jac_k = np.random.rand(rows, cols, deps)*5
prev = time.time()
fft1 = np.fft.fftn(jac_k)
print time.time() - prev
prev = time.time()
fft2 = np.fft.rfftn(jac_k)
if deps%2 == 0:
fft2Star = np.conj(fft2[:, :, -2:0:-1])
else:
fft2Star = np.conj(fft2[:, :, -1:0:-1])
fft2Star[1::, :, :] = fft2Star[:0:-1, :, :]
fft2Star[:, 1::, :] = fft2Star[:, :0:-1, :]
fft2 = np.concatenate( (fft2, fft2Star), axis=2)
print time.time() - prev
print np.linalg.norm(fft1 - fft2)

Categories