inaccuracy calculating length of a multi-section bezier curve (generated using linear interpolation) - python

For a bezier slider (from osu!) the calculated length (variable "bar") is way higher than expected length (taken from osu!)
Raw data, 5th index is control points, 7th index is the length
raw_data = 64,64,336,6,0,B|352:64|352:64|64:160|64:160|352:160|352:160|64:256|64:256|352:256|352:256|448:256|448:96,1,1679.9999554952
Bezier control points:
import math
import matplotlib.pyplot as plt
debug = True
x = [64, 352, 352, 64, 64, 352, 352, 64, 64, 352, 352, 448, 448] #control points (x)
y = [64, 64, 64, 160, 160, 160, 160, 256, 256, 256, 256, 256, 96] #control points (y)
bezier_res = 10 #currently 10, aiming for at least 200 points per curve section
Function that generates points & length between each point:
def bezier(x,y):
#seperate control points into each curve
#when two control points have the same coordinate that means the next control points will be for a seperate curve
pc = 0
px,py = [],[]
sx,sy,sd = [],[],[]
for o in range(1,len(x)):
if x[o] == x[o-1] and y[o] == y[o-1]:
px.append(x[pc:o])
py.append(y[pc:o])
pc = o
px.append(x[pc:])
py.append(y[pc:])
#produce points on each curve
for q in range(len(px)):
#get control points
qx = px[q]
qy = py[q]
#extend on last curve (removed for now)
if q < len(px)-1:
loop = bezier_res+1
else:
#loop = int(bezier_res + bezier_res / 2)
loop = bezier_res+1 #temporary
#linear interpolation for bezier points
for w in range(loop):
#scale multiplier
ws = w/bezier_res
#copy of control points
wx = qx
wy = qy
wd = []
#loop until 1 point left
for e in range(1,len(qx)):
#create blank lists
ex = []
ey = []
ed = []
#loop to create a point for every pair
for r in range(1,len(wx)):
"""Estimated section containing inaccuracy"""
#get displacement between control points
rx = (wx[r] - wx[r-1]) * ws
ry = (wy[r] - wy[r-1]) * ws
#get distance
rd = math.sqrt(rx**2 + ry**2)
#convert displacement to coordinates
rx += wx[r-1]
ry += wy[r-1]
#add to list
ex.append(rx)
ey.append(ry)
ed.append(rd)
"""Estimated end of section containing inaccuracy"""
#replace control points
#-> next loop will reduce it to less points
wx = ex
wy = ey
wd = ed
#add to output
sx.append(wx[0])
sy.append(wy[0])
sd.append(wd[0])
return sx,sy,sd
Adds up the lengths between the points
sx,sy,sd = bezier(x,y)
"""Potential other source of inaccuracy"""
bar = 0
for foo in range(len(sd)):
bar += sd[foo]
"""End of potential source of inaccuracy"""
if debug:
plt.scatter(x,y,s=40,c='r')
plt.scatter(sx,sy,s=5,c='b')
plt.ylim(384,0)
plt.xlim(0,512)
plt.show()
For the data these are the results I got:
calculated length w/ 10 points per curve: 8759.076582421749
calculated length w/ 200 points per curve: 159648.27500500486
expected length: ~1680 (from osu, assumed to be accurate)
the calculated length I expect should be slightly less than the expected length
My assumption is that the inaccuracy is being caused by the use of floats and square roots
I am aiming to use 200 points per curve (variable "bezier_res")
How can modify it so that the calculated length is close to the expected length? (± 500)
I also need to be able to get a coordinate at any specified length (within ±5) including extending beyond the final control point

Related

How to generate random geocoordinates using random module

How to generate random latitude and longitude using Python 3 random module? I already googled and read documentation and not found a way to do this.
The problem when using uniform distributions for both the latitude and the longitude
is that physically, the latitude is NOT uniformly distributed.
So if you plan to use these random points for something like some statiscal averaging computation,
or a physics Monte-Carlo simulation, the results will risk being incorrect.
And if you plot a graphical representation of the “uniform” random points, they will seem to cluster in the polar regions.
To picture that, consider on planet Earth the zone that lies between 89 and 90 degrees of latitude (North).
The length of a degree of latitude is 10,000/90 = 111 km. That zone is a circle of radius 111 km,
centered around the North Pole. Its area is about 3.14 * 111 * 111 ≈ 39,000 km2
On the other hand, consider the zone that lies between 0 and 1 degree of latitude.
This is a strip whose length is 40,000 km (the Equator) and whose width is 111 km,
so its area is 4.44 millions km2. Much larger than the polar zone.
A simple algorithm:
A possibility is to use Gaussian-distributed random variables, as provided by the Python library.
If we build a 3D vector whose 3 components have Gaussian distributions, the overall
probability distribution is like
exp(-x2) * exp(-y2) * exp(-z2)
but this is the same thing as exp(-(x2 + y2 + z2))
or exp(-r2), where r is the distance from the origin.
So these vectors have no privileged direction. Once normalized to unit length, they are uniformly
distributed on the unit sphere. They solve our problem with the latitude distribution.
The idea is implemented by the following Python code:
import math
import random
def randlatlon1():
pi = math.pi
cf = 180.0 / pi # radians to degrees Correction Factor
# get a random Gaussian 3D vector:
gx = random.gauss(0.0, 1.0)
gy = random.gauss(0.0, 1.0)
gz = random.gauss(0.0, 1.0)
# normalize to an equidistributed (x,y,z) point on the unit sphere:
norm2 = gx*gx + gy*gy + gz*gz
norm1 = 1.0 / math.sqrt(norm2)
x = gx * norm1
y = gy * norm1
z = gz * norm1
radLat = math.asin(z) # latitude in radians
radLon = math.atan2(y,x) # longitude in radians
return (round(cf*radLat, 5), round(cf*radLon, 5))
A sanity check:
Euclidean geometry provides a formula for the probability of a spherical zone
defined by minimal/maximal latitude and longitude. The corresponding Python code is like this:
def computeProbaG(minLat, maxLat, minLon, maxLon):
pi = math.pi
rcf = pi / 180.0 # degrees to radians Correction Factor
lonProba = (maxLon - minLon) / 360.0
minLatR = rcf * minLat
maxLatR = rcf * maxLat
latProba = (1.0/2.0) * (math.sin(maxLatR) - math.sin(minLatR))
return (lonProba * latProba)
And we can also compute an approximation of that same probability by random sampling, using
the random points provided by a function such as randlatlon1, and counting what
percentage of them happen to fall within the selected zone:
def computeProbaR(randlatlon, ranCount, minLat, maxLat, minLon, maxLon):
norm = 1.0 / ranCount
pairs = [randlatlon() for i in range(ranCount)]
acceptor = lambda p: ( (p[0] > minLat) and (p[0] < maxLat) and
(p[1] > minLon) and (p[1] < maxLon) )
selCount = sum(1 for p in filter(acceptor, pairs))
return (norm * selCount)
Equipped with these two functions, we can check for various geometric parameter sets
that the geometric and probabilistic results are in good agreement, with ranCount set to one million random points:
ranCount = 1000*1000
print (" ")
probaG1 = computeProbaG( 30, 60, 45, 90)
probaR1 = computeProbaR(randlatlon1, ranCount, 30, 60, 45, 90)
print ("probaG1 = %f" % probaG1)
print ("probaR1 = %f" % probaR1)
print (" ")
probaG2 = computeProbaG( 10, 55, -40, 160)
probaR2 = computeProbaR(randlatlon1, ranCount, 10, 55, -40, 160)
print ("probaG2 = %f" % probaG2)
print ("probaR2 = %f" % probaR2)
print (" ")
Execution output:
$ python3 georandom.py
probaG1 = 0.022877
probaR1 = 0.022852
probaG2 = 0.179307
probaR2 = 0.179644
$
So the two sort of numbers appears to agree reasonably here.
Addendum:
For the sake of completeness, we can add a second algorithm which is less intuitive but derives from a wider statistical principle.
To solve the problem of the latitude distribution, we can use the Inverse Transform Sampling theorem. In order to do so, we need some formula for the probability of the latitude to be less than an arbitrary prescribed value, φ.
The region of the unit 3D sphere whose latitude is less than a given φ is known as a spherical cap. Its area can be obtained thru elementary calculus, as described here for example.
The spherical cap area is given by formula: A = 2π * (1 + sin(φ))
The corresponding probability can be obtained by dividing this area by the overall area of the unit 3D sphere, that is 4π, corresponding to φ = φmax = π/2. Hence:
p = Proba{latitude < φ} = (1/2) * (1 + sin(φ))
Or, conversely:
φ = arcsin (2*p - 1)
From the Inverse Transform Sampling theorem, a fair sampling of the latitude (in radians) is obtained by replacing the probability p by a random variable uniformly distributed between 0 and 1. In Python, this gives:
lat = math.asin(2*random.uniform(0.0, 1.0) - 1.0)
As for the longitude, this is an independent random variable that is still uniformly distributed between -π and +π (in radians). So the overall Python sampler code is:
def randlatlon2r():
pi = math.pi
cf = 180.0 / pi # radians to degrees Correction Factor
u0 = random.uniform(0.0, 1.0)
u1 = random.uniform(0.0, 1.0)
radLat = math.asin(2*u0 - 1.0) # angle with Equator - from +pi/2 to -pi/2
radLon = (2*u1 - 1) * pi # longitude in radians - from -pi to +pi
return (round(radLat*cf,5), round(radLon*cf,5))
This code has been found to pass successfully the sanity check as described above.
Generate a random number between
Latitude: -85 to +85 (actually -85.05115 for some reason)
Longitude: -180 to +180
As #tandem wrote in his answer, the range for latitude is almost -90 to +90 (it is cut on maps) and for longitude it is -180 to +180. To generate random float numbers in this range use random.uniform function:
import random
# returns (lat, lon)
def randlatlon():
return (round(random.uniform( -90, 90), 5),
round(random.uniform(-180, 180), 5))
It is rounded to 5 digits after comma because that extra accuracy is unnecessary.

Fitting an Orthogonal Grid to Noisy Coordinates

Problem
I have a list of coordinates that are meant to form a grid. Each coordinate has a random error component and some of the coordinates are missing. Grid could be rotated (update). I want to fit a orthogonal grid to the data points and return a list of the grid's vertices. For example:
Application
The purpose is to find a grid in a scanned image. The data points come from the results of contour or edge detection in OpenCV. An example is image with a grid of photos.
Goal
I wrote some Python code that works, but would like to find a linear algebra algorithm using SciPy, statsmodels or other modules that would be more robust and handle a small rotation of the grid (less than 10°).
Python Code Using Lists Only
# Noisy [x, y] coordinates (origin is upper-left corner)
pts = [[103,101],
[198,103],
[300, 99],
[ 97,205],
[304,202],
[102,295],
[200,303],
[104,405],
[205,394],
[298,401]]
def row_col_avgs(num_list, ratio):
# Finds the average of each row and column. Coordinates are
# assigned to a row and column by specifying an error ratio.
last_num, sum_nums, count_nums, avgs = 0, 0, 0, []
num_list.sort()
for num in num_list:
# Calculate average for last row or column and begin new row or column
if num > (1+ratio)*last_num and count_nums != 0:
avgs.append(int(round(sum_nums/count_nums,0)))
sum_nums = num
count_nums = 1
# Or continue with current row or column
else:
sum_nums += num
count_nums += 1
last_num = num
avgs.append(int(round(sum_nums/count_nums,0)))
return avgs
# Split coordinates into two lists of x's and y's
xs, ys = map(list, zip(*pts))
# Find averages of each row and column of the grid
x_avgs = row_col_avgs(xs, 0.1)
y_avgs = row_col_avgs(ys, 0.1)
# Return vertices of completed averaged grid
avg_grid = []
for y_avg in y_avgs:
avg_row = []
for x_avg in x_avgs:
avg_row.append([int(x_avg), int(y_avg)])
avg_grid.append(avg_row)
print(avg_grid)
Output
[[[102, 101], [201, 101], [301, 101]],
[[102, 204], [201, 204], [301, 204]],
[[102, 299], [201, 299], [301, 299]],
[[102, 400], [201, 400], [301, 400]]]
Parallel Slopes Ordinary Least Squares (OLS) Model:
y = mx + grp + b where m=slope, b=y-intercept, & grp=categorical variable.
This is an alternative algorithm that can handle a rotated grid.
The OLS model includes both the data points in the original orientation
and a 90° rotation of the same data points. This is necessary so all gridlines are parallel and have the same slope.
Algorithm:
Find a reference gridline to compare with remaining points by choosing two neighboring points in the first or last row with a slope closest to zero.
Calculate the distances between this reference line and the remaining points.
Segment points into groups w.r.t. the calculated distances (one group per gridline).
Repeat steps 1 to 3 for the 90 degree rotated grid and combine results.
Create a parallel slopes OLS model to determine linear equations for the gridlines.
Rotate the rotated gridlines back to their original orientation.
Calculate the intersection points.
Note: Fails if noise, angle and/or missing data are too much.
Example:
                
Python Code to Create Example
def create_random_example():
# Requires import of numpy and random packages
# Creates grid with random noise and missing points
# Example will fail if std_dev, rotation, pct_removed too large
# Parameters
first_row, last_row = 100, 900
first_col, last_col = 100, 600
num_rows = 6
num_cols = 4
rotation = 3 # degrees that grid is rotated
sd = 3 # percent std dev of avg x and avg y coordinates
pct_remove = 30 # percent of points to randomly remove from data
# Create grid
x = np.linspace(first_col, last_col, num_cols)
y = np.linspace(first_row, last_row, num_rows)
xx, yy = np.meshgrid(x, y)
# Add noise
x = xx.flatten() + sd * np.mean(xx) * np.random.randn(xx.size) / 100
y = yy.flatten() + sd * np.mean(yy) * np.random.randn(yy.size) / 100
# Randomly remove points
random_list = random.sample(range(0, num_cols*num_rows),
int(pct_remove*num_cols*num_rows/100))
x, y = np.delete(x, random_list), np.delete(y, random_list)
pts = np.column_stack((x, y))
# Rotate points
radians = np.radians(rotation)
rot_mat = np.array([[np.cos(radians),-np.sin(radians)],
[np.sin(radians), np.cos(radians)]])
einsum = np.einsum('ji, mni -> jmn', rot_mat, [pts])
pts = np.squeeze(einsum).T
return np.rint(pts)
Python Code to Fit Gridlines
import numpy as np
import pandas as pd
import itertools
import math
import random
from statsmodels.formula.api import ols
from scipy.spatial import KDTree
import matplotlib.pyplot as plt
def pt_line_dist(pt, ref_line):
pt1, pt2 = [ref_line[:2], ref_line[2:]]
# Distance from point to line defined by two other points
return np.linalg.norm(np.cross(pt1 - pt2, [pt[0],pt[1]])) \
/ np.linalg.norm(pt1 - pt2)
def segment_pts(amts, grp_var, grp_label):
# Segment on amounts (distances here) in last column of array
# Note: need to label groups with string for OLS model
amts = amts[amts[:, -1].argsort()]
first_amt_in_grp = amts[0][-1]
group, groups, grp = [], [], 0
for amt in amts:
if amt[-1] - first_amt_in_grp > grp_var:
groups.append(group)
first_amt_in_grp = amt[-1]
group = []; grp += 1
group.append(np.append(amt[:-1],[[grp_label + str(grp)]]))
groups.append(group)
return groups
def find_reference_line(pts):
# Find point with minimum absolute slope relative both min y and max y
y = np.hsplit(pts, 2)[1] # y column of array
m = []
for i, y_pt in enumerate([ pts[np.argmin(y)], pts[np.argmax(y)] ]):
m.append(np.zeros((pts.shape[0]-1, 5))) # dtype default is float64
m[i][:,2:4] = np.delete(pts, np.where((pts==y_pt).all(axis=1))[0], axis=0)
m[i][:,4] = abs( (m[i][:,3]-y_pt[1]) / (m[i][:,2]-y_pt[0]) )
m[i][:,:2] = y_pt
m = np.vstack((m[0], m[1]))
return m[np.argmin(m[:,4]), :4]
# Ignore division by zero (slopes of vertical lines)
np.seterr(divide='ignore')
# Create dataset and plot
pts = create_random_example()
plt.scatter(pts[:,0], pts[:,1], c='r') # plot now because pts array changes
# Average distance to the nearest neighbor of each point
tree = KDTree(pts)
nn_avg_dist = np.mean(tree.query(pts, 2)[0][:, 1])
# Find groups of points representing each gridline
groups = []
for orientation in ['o', 'r']: # original and rotated orientations
# Rotate points 90 degrees (note: this moves pts to 2nd quadrant)
if orientation == 'r':
pts[:,1] = -1 * pts[:,1]
pts[:, [1, 0]] = pts[:, [0, 1]]
# Find reference line to compare remaining points for grouping
ref_line = find_reference_line(pts) # line is defined by two points
# Distances between points and reference line
pt_dists = np.zeros((pts.shape[0], 3))
pt_dists[:,:2] = pts
pt_dists[:,2] = np.apply_along_axis(pt_line_dist, 1, pts, ref_line).T
# Segment pts into groups w.r.t. distances (one group per gridline)
# Groups have range less than nn_avg_dist.
groups += segment_pts(pt_dists, 0.7*nn_avg_dist, orientation)
# Create dataframe of groups (OLS model requires a dataframe)
df = pd.DataFrame(np.row_stack(groups), columns=['x', 'y', 'grp'])
df['x'] = pd.to_numeric(df['x'])
df['y'] = pd.to_numeric(df['y'])
# Parallel slopes OLS model
ols_model = ols("y ~ x + grp + 0", data=df).fit()
# OLS parameters
grid_lines = ols_model.params[:-1].to_frame() # panda series to dataframe
grid_lines = grid_lines.rename(columns = {0:'b'})
grid_lines['grp'] = grid_lines.index.str[4:6]
grid_lines['m'] = ols_model.params[-1] # slope
# Rotate the rotated lines back to their original orientation
grid_lines.loc[grid_lines['grp'].str[0] == 'r', 'b'] = grid_lines['b'] / grid_lines['m']
grid_lines.loc[grid_lines['grp'].str[0] == 'r', 'm'] = -1 / grid_lines['m']
# Find grid intersection points by combinations of gridlines
comb = list(itertools.combinations(grid_lines['grp'], 2))
comb = [i for i in comb if i[0][0] != 'r']
comb = [i for i in comb if i[1][0] != 'o']
df_comb = pd.DataFrame(comb, columns=['grp', 'r_grp'])
# Merge gridline parameters with grid points
grid_pts = df_comb.merge(grid_lines.drop_duplicates('grp'),how='left',on='grp')
grid_lines.rename(columns={'grp': 'r_grp'}, inplace=True)
grid_pts.rename(columns={'b':'o_b', 'm': 'o_m', 'grp':'o_grp'}, inplace=True)
grid_pts = grid_pts.merge(grid_lines.drop_duplicates('r_grp'),how='left',on='r_grp')
grid_pts.rename(columns={'b':'r_b', 'm': 'r_m'}, inplace=True)
# Calculate x, y coordinates of gridline interception points
grid_pts['x'] = (grid_pts['r_b']-grid_pts['o_b']) \
/ (grid_pts['o_m']-grid_pts['r_m'])
grid_pts['y'] = grid_pts['o_m'] * grid_pts['x'] + grid_pts['o_b']
# Results output
print(grid_lines)
print(grid_pts)
plt.scatter(grid_pts['x'], grid_pts['y'], s=8, c='b') # for setting axes
axes = plt.gca()
axes.invert_yaxis()
axes.xaxis.tick_top()
axes.set_aspect('equal')
axes.set_xlim(axes.get_xlim())
axes.set_ylim(axes.get_ylim())
x_vals = np.array(axes.get_xlim())
for idx in grid_lines.index:
y_vals = grid_lines['b'][idx] + grid_lines['m'][idx] * x_vals
plt.plot(x_vals, y_vals, c='gray')
plt.show()
A numpy implementation of your code can be found below. As the size AvgGrid is known, I pre-allocate the required memory (rather than append). This should have speed advantages, especially if the number of output vertices is large.
import numpy as np
# Input of [x, y] coordinates of a sparse grid with errors
xys = np.array([[103,101],
[198,103],
[300, 99],
[ 97,205],
[304,202],
[102,295],
[200,303],
[104,405],
[205,394],
[298,401]])
# Function to average
def ColAvgs(CoordinateList, CutoffRatio = 1.1):
# Length of CoordinateList
L = len(CoordinateList)
# Sort input
SortedList = np.sort(CoordinateList)
# Determine indices to average
RelativeIncrease = SortedList[-(L-1):]/SortedList[:(L-1)]
CriticalIndices = np.flatnonzero(RelativeIncrease > CutoffRatio) + 1
Indices = np.hstack((0,CriticalIndices))
if (Indices[-1] != L):
Indices = np.hstack((Indices,L))
#print(Indices) # Uncomment to show index construction
# Compute averages
Avgs = np.empty((len(Indices)-1)); Avgs[:] = np.NaN
for iter in range(len(Avgs)):
Avgs[iter] = int( round(np.mean(SortedList[Indices[iter]:Indices[(iter+1)]]) ) )
# Return output
return Avgs
# Compute x- and y-coordinates of vertices
AvgsXcoord = ColAvgs(xys[:,0])
AvgsYcoord = ColAvgs(xys[:,1])
# Return all vertices
AvgGrid = np.empty((len(AvgsXcoord)*len(AvgsYcoord),2)); AvgGrid[:] = np.NaN
iter = 0
for y in AvgsYcoord:
for x in AvgsXcoord:
AvgGrid[iter, :] = np.hstack((x,y))
iter = iter+1
print(AvgGrid)
If you project all points on a vertical or horizontal axis, the problem turns to one of clustering with equally spaced clusters.
To perform these clusterings, you can consider the distances between the successive (sorted) points. They will form two clusters: short distances corresponding to noise, and longer ones for the grid size. You can solve the two-way clustering using the Otsu method.

Calculating mean value of a 2D array as a function of distance from the center in Python

I'm trying to calculate the mean value of a quantity(in the form of a 2D array) as a function of its distance from the center of a 2D grid. I understand that the idea is that I identify all the array elements that are at a distance R from the center, and then add them up and divide by the number of elements. However, I'm having trouble actually identifying an algorithm to go about doing this.
I have attached a working example of the code to generate the 2d array below. The code is for calculating some quantities that are resultant from gravitational lensing, so the way the array is made is irrelevant to this problem, but I have attached the entire code so that you could create the output array for testing.
import numpy as np
import multiprocessing
import matplotlib.pyplot as plt
n = 100 # grid size
c = 3e8
G = 6.67e-11
M_sun = 1.989e30
pc = 3.086e16 # parsec
Dds = 625e6*pc
Ds = 1726e6*pc #z=2
Dd = 1651e6*pc #z=1
FOV_arcsec = 0.0001
FOV_arcmin = FOV_arcsec/60.
pix2rad = ((FOV_arcmin/60.)/float(n))*np.pi/180.
rad2pix = 1./pix2rad
Renorm = (4*G*M_sun/c**2)*(Dds/(Dd*Ds))
#stretch = [10, 2]
# To create a random distribution of points
def randdist(PDF, x, n):
#Create a distribution following PDF(x). PDF and x
#must be of the same length. n is the number of samples
fp = np.random.rand(n,)
CDF = np.cumsum(PDF)
return np.interp(fp, CDF, x)
def get_alpha(args):
zeta_list_part, M_list_part, X, Y = args
alpha_x = 0
alpha_y = 0
for key in range(len(M_list_part)):
z_m_z_x = (X - zeta_list_part[key][0])*pix2rad
z_m_z_y = (Y - zeta_list_part[key][1])*pix2rad
alpha_x += M_list_part[key] * z_m_z_x / (z_m_z_x**2 + z_m_z_y**2)
alpha_y += M_list_part[key] * z_m_z_y / (z_m_z_x**2 + z_m_z_y**2)
return (alpha_x, alpha_y)
if __name__ == '__main__':
# number of processes, scale accordingly
num_processes = 1 # Number of CPUs to be used
pool = multiprocessing.Pool(processes=num_processes)
num = 100 # The number of points/microlenses
r = np.linspace(-n, n, n)
PDF = np.abs(1/r)
PDF = PDF/np.sum(PDF) # PDF should be normalized
R = randdist(PDF, r, num)
Theta = 2*np.pi*np.random.rand(num,)
x1= [R[k]*np.cos(Theta[k])*1 for k in range(num)]
y1 = [R[k]*np.sin(Theta[k])*1 for k in range(num)]
# Uniform distribution
#R = np.random.uniform(-n,n,num)
#x1= np.random.uniform(-n,n,num)
#y1 = np.random.uniform(-n,n,num)
zeta_list = np.column_stack((np.array(x1), np.array(y1))) # List of coordinates for the microlenses
x = np.linspace(-n,n,n)
y = np.linspace(-n,n,n)
X, Y = np.meshgrid(x,y)
M_list = np.array([0.1 for i in range(num)])
# split zeta_list, M_list, X, and Y
zeta_list_split = np.array_split(zeta_list, num_processes, axis=0)
M_list_split = np.array_split(M_list, num_processes)
X_list = [X for e in range(num_processes)]
Y_list = [Y for e in range(num_processes)]
alpha_list = pool.map(
get_alpha, zip(zeta_list_split, M_list_split, X_list, Y_list))
alpha_x = 0
alpha_y = 0
for e in alpha_list:
alpha_x += e[0]
alpha_y += e[1]
alpha_x_y = 0
alpha_x_x = 0
alpha_y_y = 0
alpha_y_x = 0
alpha_x_y, alpha_x_x = np.gradient(alpha_x*rad2pix*Renorm,edge_order=2)
alpha_y_y, alpha_y_x = np.gradient(alpha_y*rad2pix*Renorm,edge_order=2)
det_A = 1 - alpha_y_y - alpha_x_x + (alpha_x_x)*(alpha_y_y) - (alpha_x_y)*(alpha_y_x)
abs = np.absolute(det_A)
I = abs**(-1.)
O = np.log10(I+1)
plt.contourf(X,Y,O,100)
The array of interest is O, and I have attached a plot of how it should look like. It can be different based on the random distribution of points.
What I'm trying to do is to plot the mean values of O as a function of radius from the center of the grid. In the end, I want to be able to plot the average O as a function of distance from center in a 2d line graph. So I suppose the first step is to define circles of radius R, based on X and Y.
def circle(x,y):
r = np.sqrt(x**2 + y**2)
return r
Now I just have to figure out a way to find all the values of O, that have the same indices as equivalent values of R. Kinda confused on this part and would appreciate any help.
You can find the geometric coordinates of a circle with center (0,0) and radius R as such:
phi = np.linspace(0, 1, 50)
x = R*np.cos(2*np.pi*phi)
y = R*np.sin(2*np.pi*phi)
these values however will not fall on the regular pixel grid but in between.
In order to use them as sampling points you can either round the values and use them as indexes or interpolate the values from the near pixels.
Attention: The pixel indexes and the x, y are not the same. In your example (0,0) is at the picture location (50,50).

Trilinear Interpolation on Voxels at specific angle

I'm currently attempting to implement this algorithm for volume rendering in Python, and am conceptually confused about their method of generating the LH histogram (see section 3.1, page 4).
I have a 3D stack of DICOM images, and calculated its gradient magnitude and the 2 corresponding azimuth and elevation angles with it (which I found out about here), as well as finding the second derivative.
Now, the algorithm is asking me to iterate through a set of voxels, and "track a path by integrating the gradient field in both directions...using the second order Runge-Kutta method with an integration step of one voxel".
What I don't understand is how to use the 2 angles I calculated to integrate the gradient field in said direction. I understand that you can use trilinear interpolation to get intermediate voxel values, but I don't understand how to get the voxel coordinates I want using the angles I have.
In other words, I start at a given voxel position, and want to take a 1 voxel step in the direction of the 2 angles calculated for that voxel (one in the x-y direction, the other in the z-direction). How would I take this step at these 2 angles and retrieve the new (x, y, z) voxel coordinates?
Apologies in advance, as I have a very basic background in Calc II/III, so vector fields/visualization of 3D spaces is still a little rough for me.
Creating 3D stack of DICOM images:
def collect_data(data_path):
print "collecting data"
files = [] # create an empty list
for dirName, subdirList, fileList in os.walk(data_path):
for filename in fileList:
if ".dcm" in filename:
files.append(os.path.join(dirName,filename))
# Get reference file
ref = dicom.read_file(files[0])
# Load dimensions based on the number of rows, columns, and slices (along the Z axis)
pixel_dims = (int(ref.Rows), int(ref.Columns), len(files))
# Load spacing values (in mm)
pixel_spacings = (float(ref.PixelSpacing[0]), float(ref.PixelSpacing[1]), float(ref.SliceThickness))
x = np.arange(0.0, (pixel_dims[0]+1)*pixel_spacings[0], pixel_spacings[0])
y = np.arange(0.0, (pixel_dims[1]+1)*pixel_spacings[1], pixel_spacings[1])
z = np.arange(0.0, (pixel_dims[2]+1)*pixel_spacings[2], pixel_spacings[2])
# Row and column directional cosines
orientation = ref.ImageOrientationPatient
# This will become the intensity values
dcm = np.zeros(pixel_dims, dtype=ref.pixel_array.dtype)
origins = []
# loop through all the DICOM files
for filename in files:
# read the file
ds = dicom.read_file(filename)
#get pixel spacing and origin information
origins.append(ds.ImagePositionPatient) #[0,0,0] coordinates in real 3D space (in mm)
# store the raw image data
dcm[:, :, files.index(filename)] = ds.pixel_array
return dcm, origins, pixel_spacings, orientation
Calculating gradient magnitude:
def calculate_gradient_magnitude(dcm):
print "calculating gradient magnitude"
gradient_magnitude = []
gradient_direction = []
gradx = np.zeros(dcm.shape)
sobel(dcm,0,gradx)
grady = np.zeros(dcm.shape)
sobel(dcm,1,grady)
gradz = np.zeros(dcm.shape)
sobel(dcm,2,gradz)
gradient = np.sqrt(gradx**2 + grady**2 + gradz**2)
azimuthal = np.arctan2(grady, gradx)
elevation = np.arctan(gradz,gradient)
azimuthal = np.degrees(azimuthal)
elevation = np.degrees(elevation)
return gradient, azimuthal, elevation
Converting to patient coordinate system to get actual voxel position:
def get_patient_position(dcm, origins, pixel_spacing, orientation):
"""
Image Space --> Anatomical (Patient) Space is an affine transformation
using the Image Orientation (Patient), Image Position (Patient), and
Pixel Spacing properties from the DICOM header
"""
print "getting patient coordinates"
world_coordinates = np.empty((dcm.shape[0], dcm.shape[1],dcm.shape[2], 3))
affine_matrix = np.zeros((4,4), dtype=np.float32)
rows = dcm.shape[0]
cols = dcm.shape[1]
num_slices = dcm.shape[2]
image_orientation_x = np.array([ orientation[0], orientation[1], orientation[2] ]).reshape(3,1)
image_orientation_y = np.array([ orientation[3], orientation[4], orientation[5] ]).reshape(3,1)
pixel_spacing_x = pixel_spacing[0]
# Construct affine matrix
# Method from:
# http://nipy.org/nibabel/dicom/dicom_orientation.html
T_1 = origins[0]
T_n = origins[num_slices-1]
affine_matrix[0,0] = image_orientation_y[0] * pixel_spacing[0]
affine_matrix[0,1] = image_orientation_x[0] * pixel_spacing[1]
affine_matrix[0,3] = T_1[0]
affine_matrix[1,0] = image_orientation_y[1] * pixel_spacing[0]
affine_matrix[1,1] = image_orientation_x[1] * pixel_spacing[1]
affine_matrix[1,3] = T_1[1]
affine_matrix[2,0] = image_orientation_y[2] * pixel_spacing[0]
affine_matrix[2,1] = image_orientation_x[2] * pixel_spacing[1]
affine_matrix[2,3] = T_1[2]
affine_matrix[3,3] = 1
k1 = (T_1[0] - T_n[0])/ (1 - num_slices)
k2 = (T_1[1] - T_n[1])/ (1 - num_slices)
k3 = (T_1[2] - T_n[2])/ (1 - num_slices)
affine_matrix[:3, 2] = np.array([k1,k2,k3])
for z in range(num_slices):
for r in range(rows):
for c in range(cols):
vector = np.array([r, c, 0, 1]).reshape((4,1))
result = np.matmul(affine_matrix, vector)
result = np.delete(result, 3, axis=0)
result = np.transpose(result)
world_coordinates[r,c,z] = result
# print "Finished slice ", str(z)
# np.save('./data/saved/world_coordinates_3d.npy', str(world_coordinates))
return world_coordinates
Now I'm at the point where I want to write this function:
def create_lh_histogram(patient_positions, dcm, magnitude, azimuthal, elevation):
print "constructing LH histogram"
# Get 2nd derivative
second_derivative = gaussian_filter(magnitude, sigma=1, order=1)
# Determine if voxels lie on boundary or not (thresholding)
# Still have to code out: let's say the thresholded voxels are in
# a numpy array called voxels
#Iterate through all thresholded voxels and integrate gradient field in
# both directions using 2nd-order Runge-Kutta
vox_it = voxels.nditer(voxels, flags=['multi_index'])
while not vox_it.finished:
# ???

Fast 3 to 7 D interpolation on non uniform & non rectangular grid

[COMPLETELY REWRITTEN]
I'm looking for a way of interpolating large set of data in dimensions ranging from 3 to 7.
The data is, by nature, on a non rectangular grid and non uniformly spaced.
I looked every option I could think of (griddata, KDTree + magic, linear interpolation, reworked map_coordinates...): the fastest and most usable tool seems to be Scipy's LinearNDInterpolator function. Linear interpolation in such high dimensions space is fine and should be precise enough.
However, there is one big shortcoming with this class: data with gaps or "concave regions" will produce extrapolated results when I want only interpolation.
This is best seen with some pictures (2-D test). In the following I produce some randomly generated data for X, and VALUE, while Y is upper-bounded by a function of X (just so I create gaps).
After rescaling data (mostly done using pieces of code of the LinearNDIterpolator from the master, ie. development, branch), Delaunay triangulation will produce a Convex Hull that includes the gap, and will "extrapolate" in this region. The term "extrapolate" is not really correct here, in a technical sense, but I think would be appropriate given the fact original data is assumed to be sufficiently well sampled so that the big gaps means "no data allowed" (not physical).
To start handling the problem, I "tagged" every Delaunay (hyper-)triangles whose (hyper-)volume is higher than a user-defined threshold (by default the volume equivalent to 5% of the data extent in each dimension).
Generating random data, and evaluating the values using this technique would produce the following figure:
Black dots (with red or white rings) is the randomly generated data to be evaluated. Red rings indicates points that are being rejected (ie. value = NaN) by my custom class based on LinearNDInterpolator, and white rings show accepted points.
For clarity I've plotted triangles that have been rejected from the original Delaunay triangulation.
As you can see, there are still some white rings points that fall in the gap, which I do not want. This is because the simplex to which they belong has a volume less than the authorized maximum volume (some of these triangles even appear as lines on the figure, so it is hard to see)
My question is: how could I improve from here? What could be done?
I was thinking of grabbing all points that fall in a small ball around each evaluated point, and see if there are points in that. But this is not a good solution since it would be resource-consuming and not precise enough (eg. what about points very close to the bottom of the gap, but yet outside the upper envelope?)
Here is my custom interpolation module I used:
#!/usr/bin/env python
"""
Custom N-D linear interpolation class, based on scipy's LinearNDInterpolator.
The main differences are:
- auto-scaling
- interpolation: inside convex hull (normal behavior), and "close enough" to original data.
This rejects points that would normally be interpolated by LinearNDInterpolator.
"""
# ================
# Python modules
# ================
import cPickle
import numpy as np
from scipy.spatial import Delaunay
from scipy.interpolate import LinearNDInterpolator
from scipy.misc import factorial
# =======================
# Convenience functions
# =======================
def _inv_log10(x):
return 10**x
def det(coords): #, n):
"""
Return the determinant of the given coordinates (not the usual determinant, but the one used to compute
the hyper-volume of an hyper-triangle)
From a Delaunay triangulation, the coordinates of one simplex (ie. hyper-triangle) is given by:
coords_i = tri.points[simplex_i]
where
tri = Delaunay(points)
simplex_i = tri.simplices[i]
In an N-dimensional space, the simplex will have N+1 points, each one of them of dimension N.
Eg. in 3D, a points i has coordinates pi = (xi, yi, zi). Therefore p1 = points 1 = (x1, y1, z1)
|x1 x2 x3 x4|
|y1 y2 y3 y4| |(x1-x4) (x2-x4) (x3-x4)|
det = |z1 z2 z3 z4| = |(y1-y4) (y2-y4) (y3-y4)|
|1 1 1 1 | |(z1-z4) (z2-z4) (z3-z4)|
"""
# assert n == len(coords[0]), 'number of dimensions of coordinates (%d) != %d' % (len(coords[0]), n)
q = coords[:-1, :] - coords[-1, None, :]
sign, logdet = np.linalg.slogdet(q)
return sign * np.exp(logdet)
# ==============================
# LinearNDInterpolator wrapper
# ==============================
class Interp(object):
"""
Simple wrapper around LinearNDInterpolator.
"""
def __init__(self, points, values, **kwargs):
"""
:param points: list of coordinates (eg. [(0, 1), (0, 3), (4, 4.5)] for 3 points in 2-D)
:param values: list of associated value(s) for each point (eg. [1, 2, 3] for 3 points of single value)
:keyword rescale: rescale data points so that the final extents is [0, 1] in every dimensions
:keyword transform: transform data points (prior to rescaling). If True, automatically transform dimension coordinates
if extents span more than 2 order of magnitudes. It can also be a list of tuples of
(transformation function, inverse function), that will be applied whenever needed.
:keyword fill_value: outside bounds interpolation values (default: np.nan)
"""
try:
points = np.asanyarray(points, dtype=np.float64)
values = np.asanyarray(values, dtype=np.float64)
except ValueError:
raise ValueError('Cannot convert input points to an array of floats')
# dimensions / number of points and values
self.ndim = points.shape[1]
self.nvalues = values.shape[1]
self.npoints = points.shape[0]
# locals
self._idims = range(self.ndim)
# extents
self.minis = np.min(points, axis=0)
self.maxis = np.max(points, axis=0)
self.ranges = self.maxis - self.minis
self.magnitudes = self.maxis / self.minis
# options
rescale = kwargs.pop('rescale', True)
transform = kwargs.pop('transform', True)
fill_value = kwargs.pop('fill_value', np.nan)
# transformation
if transform:
transforms = []
if transform is True:
# automatic transformation -> if extent >= 2 order of magnitudes: f(x) = log10(x)
for i, e in enumerate(self.magnitudes):
if e >= 100.:
transforms.append((np.log10, _inv_log10))
else:
transforms.append(None)
if not transforms:
transforms = None
else:
err_msg = 'transform: both the transformation function and its inverse must be given in a tuple'
if not isinstance(transform, (tuple, list)):
raise ValueError(err_msg)
if (self.ndim > 1) and (len(transform) != self.ndim):
raise ValueError('transform: None or transformations tuple must be given for every dimension')
for t in transform:
if not isinstance(t, (tuple, list)):
raise ValueError(err_msg)
elif t is None:
transforms.append(None)
else:
transforms.append(t)
self.transforms = transforms
else:
self.transforms = None
points = self._transform(points)
# scaling
self.offset = 0.
self.scale = 1.
self.rescale = rescale
if rescale:
self.offset = np.mean(points, axis=0)
self.scale = (points - self.offset).ptp(axis=0)
self.scale[~(self.scale > 0)] = 1.0 # avoid division by 0
points = self._rescale(points)
# triangulation
self.tri = self._triangulate(points)
# volumes
self.fact = 1. / factorial(self.ndim)
self.volume_max = np.product(self.tri.points.ptp(axis=0) * 0.05) # 5% peak-to-peak in each dimension
self.rej_idx = None
self.rej_vol = None
self.cached_rej = False
# linear interpolation
self.fill_value = fill_value
self.func = LinearNDInterpolator(self.tri, values, fill_value=fill_value)
def _triangulate(self, points, **kwargs):
"""
Delaunay triangulation
"""
return Delaunay(points, **kwargs)
def _get_volume_simplex(self, point):
"""
Compute the simplex volume of the given point
"""
i = self.tri.find_simplex(point)
idx = self.tri.simplices[i]
return np.abs(self.fact * det(self.tri.points[idx]))
def cache_rejected_triangles(self, p=None, check_min=False):
"""
Cache the indexes of rejected triangles.
OPTIONS
p -- peak-to-peak percentage in each dimension for the maximum volume calculation
Default: None (default at __init__: p = 0.05)
Type: float (0 < p <= 1)
Type: list of floats (length = # dimensions)
check_min -- check that the minimum spacing in each dimension is at least equal to p * extent
Default: False
Warning: *p* must be given
"""
self.cached_rej = True
if p is not None:
p = np.array(p)
# update the maximum hyper-triangle volume (p % of the extent in each dimension)
self.volume_max = np.product(self.tri.points.ptp(axis=0) * p)
if check_min:
assert p is not None, 'You must give *p* parameter for checking minimum volume of hyper-triangle'
ptps = self.tri.points.ptp(axis=0)
ps = np.ones(self.ndim) * p
n_up = 0
for i in self._idims:
_x = np.unique(self.tri.points[:, i])
mini = np.min(_x[1:] - _x[:-1])
if mini > (ptps[i] * ps[i]):
n_up += 1
print 'WARNING: changed max. volume axis of dim. %d from %.3g to %.3g' % (i+1, ps[i], mini)
ps[i] = mini
if n_up:
new_vol = np.product(ptps * ps)
print 'CHANGE: old volume was = %.3g, and is now = %.3g' % (self.volume_max, new_vol)
self.volume_max = new_vol
rej_idx = []
rej_vol = []
for i, simplex in enumerate(self.tri.simplices):
vol = np.abs(self.fact * det(self.tri.points[simplex]))
if vol > self.volume_max:
rej_idx.append(i)
rej_vol.append(vol)
self.rej_idx = np.array(rej_idx)
self.rej_vol = np.array(rej_vol)
def _transform(self, points, inverse=False):
"""
Transform point coordinates using functions. Set 'inverse' to True to transform back.
"""
if self.transforms is not None:
j = 1 - int(inverse)
for i in self._idims:
t = self.transforms[i]
if t is None:
continue
points[:, i] = t[j](points[:, i])
return points
def _rescale(self, points, inverse=False):
"""
Rescale point coordinates so that extents in each dimensions span [0, 1]. Set 'inverse' to True to scale back.
"""
if self.rescale:
if inverse:
points = points * self.scale + self.offset
else:
points = (points - self.offset) / self.scale
return points
def _check(self, x, res):
"""
Check that interpolation results are close enough to real data and have not been extrapolated.
"""
points = np.asanyarray(x)
if points.ndim == 1:
# only 1 point
values = np.asanyarray(res).reshape(1, self.ndim)
else:
# more than 1 point
values = np.asanyarray(res).reshape(points.shape[0], self.ndim)
if self.cached_rej:
idx = np.unique(np.where(np.isfinite(values))[0])
ui_tri, uii = np.unique(self.tri.find_simplex(points[idx]), return_inverse=True)
umask = np.lib.arraysetops.in1d(ui_tri, self.rej_idx, assume_unique=True)
mask = umask[uii]
values[idx[mask], :] = self.fill_value
else:
for i, v in enumerate(values):
if not np.isnan(v[0]):
vol = self._get_volume_simplex(points[i])
if vol > self.volume_max:
# reject
values[i][:] = self.fill_value
return values.reshape(res.shape)
def __call__(self, x, check=False):
"""
Interpolate. If 'check' is True, check that interpolated points are close enough to real data.
"""
_x = self._rescale(self._transform(x))
res = self.func(_x)
if check:
res = self._check(_x, res)
return res
def ev(self, x, check=False):
"""
Alias for __call__
"""
return self.__call__(x, check=check)
def get_original_points(self):
"""
Return original points
"""
return self._transform(self._rescale(self.func.points, inverse=True), inverse=True)
def get_original_values(self):
"""
Return original values
"""
return self.func.values
# ===========================
# Save / load interpolation
# ===========================
def save(filename, interp):
"""
Dump the Interp instance to a binary file with cPickle (protocol 2)
"""
with open(filename, 'wb') as f:
cPickle.dump(interp, f, protocol=2)
def load(filename):
"""
Load a previously saved (cPickled with save_interp function) Interp instance
"""
with open(filename, 'rb') as f:
interp = cPickle.load(f)
return interp
And the test script:
#!/usr/bin/env python
"""
Test the custom interpolation class (see interp.py)
"""
import sys
import numpy as np
from interp import Interp
import matplotlib.pyplot as plt
# generate random data
n = 2000 # number of generated points
x = np.random.random(n)
def f(v):
maxi = v ** (1/(v+1e-5)) * (v - 5.) ** 2 - np.exp(v-7) + 1
return np.random.random() * maxi
y = map(f, x * 10)
z = np.random.random(n)
points = np.array((x, y)).T
values = np.random.random(points.shape)
# create interpolation function
func = Interp(points, values, transform=False)
func.cache_rejected_triangles(p=0.05, check_min=True)
# generate random data + evaluate
pts = np.random.random((500, points.shape[1]))
pts *= points.ptp(0)
pts += points.min(0)
res = func(pts, check=True)
# rejected points indexes
idx_rej = np.unique(np.where(np.isnan(res))[0])
n_rej = len(idx_rej)
print '%d points (%.0f%%) have been rejected' % (n_rej, 100.*n_rej/pts.shape[0])
# plot rejected triangles
fig = plt.figure()
ax = plt.gca()
for i in func.rej_idx:
_x = [p for p in points[func.tri.simplices[i], 0]]
_x += [points[func.tri.simplices[i][0], 0]]
_y = [p for p in points[func.tri.simplices[i], 1]]
_y += [points[func.tri.simplices[i][0], 1]]
ax.plot(_x, _y, c='k', ls='-', zorder=100)
# plot original data
ax.scatter(points[:, 0], points[:, 1], c='b', linewidths=0, s=20, zorder=50)
# plot all points (both accepted and rejected): in white
ax.scatter(pts[:, 0], pts[:, 1], c='k', edgecolors='w', linewidths=1, zorder=150, s=30)
# re-plot rejected points: in red
ax.scatter(pts[idx_rej, 0], pts[idx_rej, 1], c='k', edgecolors='r', linewidths=1, zorder=200, s=30)
fig.savefig('img_tri.png', transparent=True, dpi=300)

Categories