I'm trying to create a Voronoi diagram given a set of scatterplot points. However, several "extra unintended lines" appear to get calculated in the process. Some of these "extra" lines appear to be the infinite edges getting incorrectly calculated. But others are appearing randomly in the middle of the plot as well. How can I only create an extra edge when it's needed/required to connect a polygon to the edge of the plot (e.g. plot boundaries)?
My graph outer boundaries are:
boundaries = np.array([[0, -2], [0, 69], [105, 69], [105, -2], [0, -2]])
Here's the section dealing with the voronoi diagram creation:
def voronoi_polygons(voronoi, diameter):
centroid = voronoi.points.mean(axis=0)
ridge_direction = defaultdict(list)
for (p, q), rv in zip(voronoi.ridge_points, voronoi.ridge_vertices):
u, v = sorted(rv)
if u == -1:
t = voronoi.points[q] - voronoi.points[p] # tangent
n = np.array([-t[1], t[0]]) / np.linalg.norm(t) # normal
midpoint = voronoi.points[[p, q]].mean(axis=0)
direction = np.sign(np.dot(midpoint - centroid, n)) * n
ridge_direction[p, v].append(direction)
ridge_direction[q, v].append(direction)
for i, r in enumerate(voronoi.point_region):
region = voronoi.regions[r]
if -1 not in region:
# Finite region.
yield Polygon(voronoi.vertices[region])
continue
# Infinite region.
inf = region.index(-1) # Index of vertex at infinity.
j = region[(inf - 1) % len(region)] # Index of previous vertex.
k = region[(inf + 1) % len(region)] # Index of next vertex.
if j == k:
# Region has one Voronoi vertex with two ridges.
dir_j, dir_k = ridge_direction[i, j]
else:
# Region has two Voronoi vertices, each with one ridge.
dir_j, = ridge_direction[i, j]
dir_k, = ridge_direction[i, k]
# Length of ridges needed for the extra edge to lie at least
# 'diameter' away from all Voronoi vertices.
length = 2 * diameter / np.linalg.norm(dir_j + dir_k)
# Polygon consists of finite part plus an extra edge.
finite_part = voronoi.vertices[region[inf + 1:] + region[:inf]]
extra_edge = [voronoi.vertices[j] + dir_j * length,
voronoi.vertices[k] + dir_k * length]
combined_finite_edge = np.concatenate((finite_part, extra_edge))
poly = Polygon(combined_finite_edge)
yield poly
Here are the points being used:
['52.629' '24.28099822998047']
['68.425' '46.077999114990234']
['60.409' '36.7140007019043']
['72.442' '28.762001037597656']
['52.993' '43.51799964904785']
['59.924' '16.972000122070312']
['61.101' '55.74899959564209']
['68.9' '13.248001098632812']
['61.323' '29.0260009765625']
['45.283' '36.97500038146973']
['52.425' '19.132999420166016']
['37.739' '28.042999267578125']
['48.972' '2.3539962768554688']
['33.865' '30.240001678466797']
['52.34' '64.94799995422363']
['52.394' '45.391000747680664']
['52.458' '34.79800033569336']
['31.353' '43.14500045776367']
['38.194' '39.24399948120117']
['98.745' '32.15999984741211']
['6.197' '32.606998443603516']
Most likely this is due to the errors associated with floating point arithmetic while computing the voronoi traingulation from your data (esp. the second column).
Assuming that, there is no single solution for such kinds of problems. I urge you to go through this page* of the Qhull manual and try iterating through those parameters in qhull_options before generating the voronoi object that you are inputting in the function. An example would be qhull_options='Qbb Qc Qz QJ'.
Other than that I doubt there is anything that could be modified in the function to avoid such a problem.
*This will take some time though. Just be patient.
Figured out what was wrong: after each polygon I needed to add a null x and y value or else it would attempt to 'stitch' one polygon to another, drawing an additional unintended line in order to do so. So the data should really look more like this:
GameTime,Half,ObjectType,JerseyNumber,X,Y,PlayerIDEvent,PlayerIDTracking,MatchIDEvent,Position,teamId,i_order,v_vor_x,v_vor_y
0.0,1,1,22,None,None,578478,794888,2257663,3,35179.0,0,22.79645297,6.20866756
0.0,1,1,22,None,None,578478,794888,2257663,3,35179.0,1,17.63464264,3.41230187
0.0,1,1,22,None,None,578478,794888,2257663,3,35179.0,2,20.27639318,34.29191902
0.0,1,1,22,None,None,578478,794888,2257663,3,35179.0,3,32.15600546,36.60432421
0.0,1,1,22,None,None,578478,794888,2257663,3,35179.0,4,38.34639812,33.62806739
0.0,1,1,22,None,None,578478,794888,2257663,3,35179.0,5,22.79645297,6.20866756
0.0,1,1,22,None,None,578478,794888,2257663,3,35179.0,5,nan,nan
0.0,1,1,22,33.865,30.240001678466797,578478,794888,2257663,3,35179.0,,,
0.0,1,0,92,None,None,369351,561593,2257663,1,32446.0,0,46.91696938,29.44801535
0.0,1,0,92,None,None,369351,561593,2257663,1,32446.0,1,55.37574848,29.5855499
0.0,1,0,92,None,None,369351,561593,2257663,1,32446.0,2,58.85876401,23.20381766
0.0,1,0,92,None,None,369351,561593,2257663,1,32446.0,3,57.17455086,21.5228301
0.0,1,0,92,None,None,369351,561593,2257663,1,32446.0,4,44.14237744,22.03925667
0.0,1,0,92,None,None,369351,561593,2257663,1,32446.0,5,45.85962774,28.83613332
0.0,1,0,92,None,None,369351,561593,2257663,1,32446.0,5,nan,nan
0.0,1,0,92,52.629,24.28099822998047,369351,561593,2257663,1,32446.0,,,
0.0,1,0,27,None,None,704169,704169,2257663,2,32446.0,0,65.56965667,33.4292025
0.0,1,0,27,None,None,704169,704169,2257663,2,32446.0,1,57.23303682,32.43809027
0.0,1,0,27,None,None,704169,704169,2257663,2,32446.0,2,55.65704152,38.97814049
0.0,1,0,27,None,None,704169,704169,2257663,2,32446.0,3,60.75304149,44.53251169
0.0,1,0,27,None,None,704169,704169,2257663,2,32446.0,4,65.14170295,40.77562188
0.0,1,0,27,None,None,704169,704169,2257663,2,32446.0,5,65.56965667,33.4292025
0.0,1,0,27,None,None,704169,704169,2257663,2,32446.0,5,nan,nan
I am converting mesh-grid points (2D Maze) into Adjacency matrix which I will use later to find the shortest path between the given coordinates. Please have a look into my code:
import numpy as np
import numpy.matlib #for matrices
def cutblocks(xv,yv,allBlocks):
x_new,y_new = np.copy(xv), np.copy(yv)
ori_x,ori_y = xv[0][0], yv[0][0]
for block in allBlocks:
block = sorted(block)
block_xmin = np.min((block[0][0], block[2][0]))
block_xmax = np.max((block[0][0], block[2][0]))
block_ymin = np.min((block[0][1], block[1][1]))
block_ymax = np.max((block[0][1], block[1][1]))
rx_min, rx_max = int((block_xmin-ori_x)/stepSize)+1, int((block_xmax-ori_x)/stepSize)+1
ry_min, ry_max = int((block_ymin-ori_y)/stepSize)+1, int((block_ymax-ori_y)/stepSize)+1
for i in range(rx_min,rx_max):
for j in range(ry_min,ry_max):
x_new[j][i] = np.nan
for i in range(ry_min,ry_max):
for j in range(rx_min,rx_max):
y_new[i][j] = np.nan
return x_new, y_new
stepSize = 0.2
##Blocks that should be disabled
allBlocks = [[(139.6, 93.6), (143.6, 93.6), (143.6, 97.6), (139.6, 97.6)],
[(154.2, 93.4), (158.2, 93.4), (158.2, 97.4), (154.2, 97.4)],
[(139.2, 77.8), (143.2, 77.8), (143.2, 81.8), (139.2, 81.8)],
[(154.2, 77.8), (158.2, 77.8), (158.2, 81.8), (154.2, 81.8)],
[(139.79999999999998, 86.4),
(142.6, 86.4),
(142.6, 88.0),
(139.79999999999998, 88.0)],
[(154.79999999999998, 87.2),
(157.6, 87.2),
(157.6, 88.8),
(154.79999999999998, 88.8)]]
x = np.arange(136.0, 161.0, stepSize)
y = np.arange(75.0, 101.0, stepSize)
xv, yv = np.meshgrid(x, y)
xv, yv = cutblocks(xv,yv,allBlocks)
MazeSize = xv.shape[0]*xv.shape[1]
adj = np.matlib.zeros((MazeSize,MazeSize)) #initialize AdjacencyMatrix
#make 1 whenever there is a connection between neighboring coordinates
mazeR, mazeC = 0,0
for row in range(xv.shape[0]):
for col in range(xv.shape[1]):
if xv[row][col]>0 and col+1<xv.shape[1] and round(np.abs(xv[row][col] - xv[row][col+1]),2) == stepSize:
adj[mazeR,mazeC+1] = 1
break
mazeC = mazeC+1
mazeR = mazeR+1
This code generates a mesh-grid in which some of the points are disabled because they are walls in the maze. The cost for every step (between connected vertices) is 1. My questions are:
1) The adjacency Matrix would be N.N and N=x.y (. is multiply). is that correct?
2) What could be the efficient way of finding and assigning the neighbors to values 1 in the adjacency matrix. ( I tried it but it doesn't work correctly)
3) Should I use graphs for this kind of problems ? My final goal is find the shortest path between the 2 coordinates (vertices).
Thanks
I'm trying to vectorize a code with numpy, to run it using multiprocessing, but i can't understand how numpy.apply_along_axis works. This is an example of the code, vectorized using map
import numpy
from scipy import sparse
import multiprocessing
from matplotlib import pyplot
#first i build a matrix of some x positions vs time datas in a sparse format
matrix = numpy.random.randint(2, size = 100).astype(float).reshape(10,10)
x = numpy.nonzero(matrix)[0]
times = numpy.nonzero(matrix)[1]
weights = numpy.random.rand(x.size)
#then i define an array of y positions
nStepsY = 5
y = numpy.arange(1,nStepsY+1)
#now i build an image using x-y-times coordinates and x-times weights
def mapIt(ithStep):
ncolumns = 80
image = numpy.zeros(ncolumns)
yTimed = y[ithStep]*times
positions = (numpy.round(x-yTimed)+50).astype(int)
values = numpy.bincount(positions,weights)
values = values[numpy.nonzero(values)]
positions = numpy.unique(positions)
image[positions] = values
return image
image = list(map(mapIt, range(nStepsY)))
image = numpy.array(image)
a = pyplot.imshow(image, aspect = 10)
Here the output plot
I tried to use numpy.apply_along_axis, but this function allows me to iterate only along the rows of image, while i need to iterate along the ithStep index too. E.g.:
#now i build an image using x-y-times coordinates and x-times weights
nrows = nStepsY
ncolumns = 80
matrix = numpy.zeros(nrows*ncolumns).reshape(nrows,ncolumns)
def applyIt(image):
image = numpy.zeros(ncolumns)
yTimed = y[ithStep]*times
positions = (numpy.round(x-yTimed)+50).astype(int)
values = numpy.bincount(positions,weights)
values = values[numpy.nonzero(values)]
positions = numpy.unique(positions)
image[positions] = values
return image
imageApplied = numpy.apply_along_axis(applyIt,1,matrix)
a = pyplot.imshow(imageApplied, aspect = 10)
It obviously return only the firs row nrows times, since nothing iterates ithStep:
And here the wrong plot
There is a way to iterate an index, or to use an index while numpy.apply_along_axis iterates?
Here the code with only matricial operations: it's quite faster than map or apply_along_axis but uses so much memory.
(in this function i use a trick with scipy.sparse, which works more intuitively than numpy arrays when you try to sum numbers on a same element)
def fullmatrix(nRows, nColumns):
y = numpy.arange(1,nStepsY+1)
image = numpy.zeros((nRows, nColumns))
yTimed = numpy.outer(y,times)
x3d = numpy.outer(numpy.ones(nStepsY),x)
weights3d = numpy.outer(numpy.ones(nStepsY),weights)
y3d = numpy.outer(y,numpy.ones(x.size))
positions = (numpy.round(x3d-yTimed)+50).astype(int)
matrix = sparse.coo_matrix((numpy.ravel(weights3d), (numpy.ravel(y3d), numpy.ravel(positions)))).todense()
return matrix
image = fullmatrix(nStepsY, 80)
a = pyplot.imshow(image, aspect = 10)
This way is simplier and very fast! Thank you so much.
nStepsY = 5
nRows = nStepsY
nColumns = 80
y = numpy.arange(1,nStepsY+1)
image = numpy.zeros((nRows, nColumns))
fakeRow = numpy.zeros(positions.size)
def itermatrix(ithStep):
yTimed = y[ithStep]*times
positions = (numpy.round(x-yTimed)+50).astype(int)
matrix = sparse.coo_matrix((weights, (fakeRow, positions))).todense()
matrix = numpy.ravel(matrix)
missColumns = (nColumns-matrix.size)
zeros = numpy.zeros(missColumns)
matrix = numpy.concatenate((matrix, zeros))
return matrix
for i in numpy.arange(nStepsY):
image[i] = itermatrix(i)
#or, without initialization of image:
imageMapped = list(map(itermatrix, range(nStepsY)))
imageMapped = numpy.array(imageMapped)
It feels like attempting to use map or apply_along_axis is obscuring the essentially iteration of the problem.
I rewrote your code as an explicit loop on y:
nStepsY = 5
y = numpy.arange(1,nStepsY+1)
image = numpy.zeros((nStepsY, 80))
for i, yi in enumerate(y):
yTimed = yi*times
positions = (numpy.round(x-yTimed)+50).astype(int)
values = numpy.bincount(positions,weights)
values = values[numpy.nonzero(values)]
positions = numpy.unique(positions)
image[i, positions] = values
a = pyplot.imshow(image, aspect = 10)
pyplot.show()
Looking at the code, I think I could calculate positions for all y values making a (y.shape[0],times.shape[0]) array. But the rest, the bincount and unique still have to work row by row.
apply_along_axis when working with a 2d array, and axis=1 essentially does:
res = np.zeros_like(arr)
for i in range....:
res[i,:] = func1d(arr[i,:])
If the input array has more dimensions it constructs a more elaborate indexing object [i,j,k,:]. And it can handle cases where func1d returns a different size array than the input. But in any case it is just a generalized iteration tool.
Moving the initial positions creation outside the loop:
yTimed = y[:,None]*times
positions = (numpy.round(x-yTimed)+50).astype(int)
image = numpy.zeros((positions.shape[0], 80))
for i, pos in enumerate(positions):
values = numpy.bincount(pos,weights)
values = values[numpy.nonzero(values)]
pos = numpy.unique(pos)
image[i, pos] = values
Now I can cast this as an apply_along_axis problem, with an applyIt that takes a positions vector (with all the yTimed information) rather than blank image vector.
def applyIt(pos, size, weights):
acolumn = numpy.zeros(size)
values = numpy.bincount(pos,weights)
values = values[numpy.nonzero(values)]
pos = numpy.unique(pos)
acolumn[pos] = values
return acolumn
image = numpy.apply_along_axis(applyIt, 1, positions, 80, weights)
Timing wise I expect it's a bit slower than my explicit iteration. It has to do more setup work, including a test call applyIt(positions[0,:],...) to determine the size of its return array (i.e image has different shape than positions.)
def csrmatrix(y, times, x, weights):
yTimed = numpy.outer(y,times)
n=y.shape[0]
x3d = numpy.outer(numpy.ones(n),x)
weights3d = numpy.outer(numpy.ones(n),weights)
y3d = numpy.outer(y,numpy.ones(x.size))
positions = (numpy.round(x3d-yTimed)+50).astype(int)
#print(y.shape, weights3d.shape, y3d.shape, positions.shape)
matrix = sparse.csr_matrix((numpy.ravel(weights3d), (numpy.ravel(y3d), numpy.ravel(positions))))
#print(repr(matrix))
return matrix
# one call
image = csrmatrix(y, times, x, weights)
# iterative call
alist = []
for yi in numpy.arange(1,nStepsY+1):
alist.append(csrmatrix(numpy.array([yi]), times, x, weights))
def mystack(alist):
# concatenate without offset
row, col, data = [],[],[]
for A in alist:
A = A.tocoo()
row.extend(A.row)
col.extend(A.col)
data.extend(A.data)
print(len(row),len(col),len(data))
return sparse.csr_matrix((data, (row, col)))
vimage = mystack(alist)