Find number of connected components of a k-nearest neighbours graph?

Find number of connected components of a k-nearest neighbours graph? - python

Is there an elegant way finding the number of connected components using a pre-calculated KDTree? Right now find the connected components using a breath-first search algorithm with the adjacency matrix given by the KDTree of k-nearest neighbours, but is there a better possibility?
import collections
import numpy as np
from sklearn import neighbors
N = 100
N_k = 8
ra = np.random.random
X0,X1 = ra(N),ra(N)
X0[0:N//2]+= 2
X1[0:N//2]+= 2
X = np.array([X0,X1]).T
tree = neighbors.KDTree(X)
dist, adj = tree.query(X, k = N_k+1)
dist = dist[:,1::]
adj = adj[:,1::]
print("Inside of find_components_lifo")
print("N = %d/ N_k = %d"%(N,N_k))
labels = np.zeros(N, dtype = np.int) - 1
n = 0
steps = 0
remains = (labels == -1)
while n < N:
i = np.arange(0,N,1)[remains][np.random.randint(0,N - n)]
# This is important for directed graphs
labels[i] = i
lifo = collections.deque([i])
while lifo:
ele = lifo.pop()
for k in adj[ele,:]:
if labels[k] == -1:
labels[k] = labels[i]
lifo.append(k)
elif labels[k] != labels[i]:
labels[labels == labels[i]] = labels[k]
remains = (labels == -1)
n = N - len(np.nonzero(remains)[0])
unique = np.unique(labels)
labels_ = np.zeros(N, dtype = np.int) - 1
for i, label in enumerate(unique):
choice = (labels == label)
N_cl = len(np.nonzero(choice)[0])
print("We found a cluster with N = %d"%N_cl)
labels_[choice] = i
import matplotlib.pyplot as plt
fig, ax = plt.subplots()
fixticks(ax)
plt.show()
colors_ = np.array(colors)
for i in range(N):
for j in range(N_k):
ax.plot([X0[i],X0[adj[i,j]]],[X1[i],X1[adj[i,j]]], color = colors_[labels_[i]], alpha = 0.3)
ax.grid(True)
ax.scatter(X0,X1, s = 60, color = "Black")
plt.show()
I

I think you can use scipy's connected_components and scikit-learn's kneighbors_graph together. Does this produce what you're looking for?
from sklearn import neighbors
from scipy.sparse import csgraph
adj = neighbors.kneighbors_graph(X, N_k)
n_components, labels = csgraph.connected_components(adj)

Related

plot decision boundary in python

I did a logistic regression on my data and now I find the best Theta array to find the class of a new data.
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
def h_theta(x,theta):
return np.dot(x,np.transpose(theta))
def g_z(x,theta):
return 1/(1+pow(np.e,-h_theta(x,theta)))
def cost_function(x,y,theta):
cost = 0
for i in range(len(y)):
l = np.log(g_z(x[i],theta))
cost += -y[i]*l -(1-y[i])*np.log((1-(g_z(x[i],theta))))
return cost/(2*len(y))
def updata_theta(x,y,theta,alpha):
for i in range(6):
u = 0
for j in range(len(y)):
u += (h_theta(x[j],theta)-y[j])*x[j,i]
theta[0,i] -= alpha*u/(len(y))
data = pd.read_csv("D:\REZA\programming\machine learning-andrew ng\coding\machine-learning-ex2\ex2\ex2data2.csv")
y = np.array(data["1"])
s = np.array(data.drop("1",axis=1))
x1T2 = np.zeros((117,1))
x2T2 = np.zeros((117,1))
x1x2 = np.zeros((117,1))
one = np.ones((117,1))
m = len(y)
for i in range(m):
x1T2[i] = s[i,0]*s[i,0]
x2T2[i] = s[i,1]*s[i,1]
x1x2[i] = s[i,0]*s[i,1]
x = np.append(one,s,axis=1)
f = np.append(x1T2,x2T2,axis=1)
f = np.append(f,x1x2,axis=1)
x = np.append(x,f,axis=1)
x = np.array(x,dtype=np.float)
theta = np.zeros((1,6),dtype=float)
n=0
alpha = 0.003
while(n<100 and cost_function(x,y,theta)>0.01):
updata_theta(x,y,theta,alpha)
n+=1
I can plot my data with plt.scatter
plt.scatter(x[:,1],x[:,2],c=y)
plt.show()
scatter plot output
Now I want to plot decision boundary using this theta array, but I don't know how to do it.

Polygon from set of points that lie on a boundary

I have following set of points that lie on a boundary and want to create the polygon that connects these points. For a person it is quite obvious what path to follow, but I am unable to find an algorithm that does the same and trying to solve it myself it all seems quite tricky and ambiguous occasionally. What is the best solution for this?
As a background.
This is the boundary for the julia set with constant = -0.624+0.435j with stable area defined after 100 iterations. I got these points by setting the stable points to 1 and all other to zero and then convolving with a 3x3 matrix [[1, 1, 1], [1, 1, 1], [1, 1, 1]] and select the points that have value 1. My experimenting code is as follows:
import numpy as np
from scipy.signal import convolve2d
import matplotlib.pyplot as plt
r_min, r_max = -1.5, 1.5
c_min, c_max = -2.0, 2.0
dpu = 50 # dots per unit - 50 dots per 1 units means 200 points per 4 units
max_iterations = 100
cmap='hot'
intval = 1 / dpu
r_range = np.arange(r_min, r_max + intval, intval)
c_range = np.arange(c_min, c_max + intval, intval)
constant = -0.624+0.435j
def z_func(point, constant):
z = point
stable = True
num_iterations = 1
while stable and num_iterations < max_iterations:
z = z**2 + constant
if abs(z) > max(abs(constant), 2):
stable = False
return (stable, num_iterations)
num_iterations += 1
return (stable, 0)
points = np.array([])
colors = np.array([])
stables = np.array([], dtype='bool')
progress = 0
for imag in c_range:
for real in r_range:
point = complex(real, imag)
points = np.append(points, point)
stable, color = z_func(point, constant)
stables = np.append(stables, stable)
colors = np.append(colors, color)
print(f'{100*progress/len(c_range)/len(r_range):3.2f}% completed\r', end='')
progress += len(r_range)
print(' \r', end='')
rows = len(r_range)
start = len(colors)
orig_field = []
for i_num in range(len(c_range)):
start -= rows
real_vals = [color for color in colors[start:start+rows]]
orig_field.append(real_vals)
orig_field = np.array(orig_field, dtype='int')
rows = len(r_range)
start = len(stables)
stable_field = []
for i_num in range(len(c_range)):
start -= rows
real_vals = [1 if val == True else 0 for val in stables[start:start+rows]]
stable_field.append(real_vals)
stable_field = np.array(stable_field, dtype='int')
kernel = np.array([[1, 1, 1], [1, 1, 1], [1, 1, 1]])
stable_boundary = convolve2d(stable_field, kernel, mode='same')
boundary_points = []
cols, rows = stable_boundary.shape
assert cols == len(c_range), "check c_range and cols"
assert rows == len(r_range), "check r_range and rows"
zero_field = np.zeros((cols, rows))
for col in range(cols):
for row in range(rows):
if stable_boundary[col, row] in [1]:
real_val = r_range[row]
# invert cols as min imag value is highest col and vice versa
imag_val = c_range[cols-1 - col]
stable_boundary[col, row] = 1
boundary_points.append((real_val, imag_val))
else:
stable_boundary[col, row] = 0
fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(nrows=2, ncols=2, figsize=(5, 5))
ax1.matshow(orig_field, cmap=cmap)
ax2.matshow(stable_field, cmap=cmap)
ax3.matshow(stable_boundary, cmap=cmap)
x = [point[0] for point in boundary_points]
y = [point[1] for point in boundary_points]
ax4.plot(x, y, 'o', c='r', markersize=0.5)
ax4.set_aspect(1)
plt.show()
Output with dpu = 200 and max_iterations = 100:
inspired by this Youtube video: What's so special about the Mandelbrot Set? - Numberphile

Thanks for the input. As it turned out this is indeed not as easy as it seems. In the end I have used the convex_hull and the alpha shape algorithms to deterimine boundary polygon(s) around the boundary points as shown the picture below. Top left is the juliaset where colors represent the number of iterations; top right black is unstable and white is stable; bottom left is a collection of points representing the boundary between unstable and stable; and bottom right is the collection of boundary polygons around the boundary points.
The code shows below:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.patches import Polygon
from matplotlib import patches as mpl_patches
from matplotlib.collections import PatchCollection
import shapely.geometry as geometry
from shapely.ops import cascaded_union, polygonize
from scipy.signal import convolve2d
from scipy.spatial import Delaunay # pylint: disable-msg=no-name-in-module
from descartes.patch import PolygonPatch
def juliaset_func(point, constant, max_iterations):
z = point
stable = True
num_iterations = 1
while stable and num_iterations < max_iterations:
z = z**2 + constant
if abs(z) > max(abs(constant), 2):
stable = False
return (stable, num_iterations)
num_iterations += 1
return (stable, num_iterations)
def create_juliaset(r_range, c_range, constant, max_iterations):
''' create a juliaset that returns two fields (matrices) - orig_field and
stable_field, where orig_field contains the number of iterations for
a point in the complex plane (r, c) and stable_field for each point
either whether the point is stable (True) or not stable (False)
'''
points = np.array([])
colors = np.array([])
stables = np.array([], dtype='bool')
progress = 0
for imag in c_range:
for real in r_range:
point = complex(real, imag)
points = np.append(points, point)
stable, color = juliaset_func(point, constant, max_iterations)
stables = np.append(stables, stable)
colors = np.append(colors, color)
print(f'{100*progress/len(c_range)/len(r_range):3.2f}% completed\r', end='')
progress += len(r_range)
print(' \r', end='')
rows = len(r_range)
start = len(colors)
orig_field = []
stable_field = []
for i_num in range(len(c_range)):
start -= rows
real_colors = [color for color in colors[start:start+rows]]
real_stables = [1 if val == True else 0 for val in stables[start:start+rows]]
orig_field.append(real_colors)
stable_field.append(real_stables)
orig_field = np.array(orig_field, dtype='int')
stable_field = np.array(stable_field, dtype='int')
return orig_field, stable_field
def find_boundary_points_of_stable_field(stable_field, r_range, c_range):
''' find the boundary points by convolving the stable_field with a 3x3
kernel of all ones and define the point on the boundary where the
convolution is 1.
'''
kernel = np.array([[1, 1, 1], [1, 1, 1], [1, 1, 1]], dtype='int8')
stable_boundary = convolve2d(stable_field, kernel, mode='same')
rows = len(r_range)
cols = len(c_range)
boundary_points = []
for col in range(cols):
for row in range(rows):
# Note you can make the boundary 'thicker ' by
# expanding the range of possible values like [1, 2, 3]
if stable_boundary[col, row] in [1]:
real_val = r_range[row]
# invert cols as min imag value is highest col and vice versa
imag_val = c_range[cols-1 - col]
boundary_points.append((real_val, imag_val))
else:
pass
return [geometry.Point(val[0], val[1]) for val in boundary_points]
def alpha_shape(points, alpha):
''' determine the boundary of a cluster of points whereby 'sharpness' of
the boundary depends on alpha.
paramaters:
:points: list of shapely Point objects
:alpha: scalar
returns:
shapely Polygon object or MultiPolygon
edge_points: list of start and end point of each side of the polygons
'''
if len(points) < 4:
# When you have a triangle, there is no sense
# in computing an alpha shape.
return geometry.MultiPoint(list(points)).convex_hull
def add_edge(edges, edge_points, coords, i, j):
"""
Add a line between the i-th and j-th points,
if not in the list already
"""
if (i, j) in edges or (j, i) in edges:
# already added
return
edges.add((i, j))
edge_points.append((coords[[i, j]]))
coords = np.array([point.coords[0]
for point in points])
tri = Delaunay(coords)
edges = set()
edge_points = []
# loop over triangles:
# ia, ib, ic = indices of corner points of the
# triangle
for ia, ib, ic in tri.vertices:
pa = coords[ia]
pb = coords[ib]
pc = coords[ic]
# Lengths of sides of triangle
a = np.sqrt((pa[0]-pb[0])**2 + (pa[1]-pb[1])**2)
b = np.sqrt((pb[0]-pc[0])**2 + (pb[1]-pc[1])**2)
c = np.sqrt((pc[0]-pa[0])**2 + (pc[1]-pa[1])**2)
# Semiperimeter of triangle
s = (a + b + c)/2.0
# Area of triangle by Heron's formula
area = np.sqrt(s*(s-a)*(s-b)*(s-c))
circum_r = a*b*c/(4.0*area)
# Here's the radius filter.
if circum_r < alpha:
add_edge(edges, edge_points, coords, ia, ib)
add_edge(edges, edge_points, coords, ib, ic)
add_edge(edges, edge_points, coords, ic, ia)
m = geometry.MultiLineString(edge_points)
triangles = list(polygonize(m))
return cascaded_union(triangles), edge_points
def main():
fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(nrows=2, ncols=2, figsize=(5, 5))
# define limits, range and resolution in the complex plane
r_min, r_max = -1.5, 1.5
c_min, c_max = -1.1, 1.1
dpu = 100 # dots per unit - 50 dots per 1 units means 200 points per 4 units
intval = 1 / dpu
r_range = np.arange(r_min, r_max + intval, intval)
c_range = np.arange(c_min, c_max + intval, intval)
# create two matrixes (orig_field and stable_field) for the juliaset with
# constant
constant = -0.76 -0.10j
max_iterations = 50
orig_field, stable_field = create_juliaset(r_range, c_range,
constant,
max_iterations)
cmap='nipy_spectral'
ax1.matshow(orig_field, cmap=cmap, interpolation='bilinear')
ax2.matshow(stable_field, cmap=cmap)
# find points that are on the boundary of the stable field
boundary_points = find_boundary_points_of_stable_field(stable_field,
r_range, c_range)
x = [p.x for p in boundary_points]
y = [p.y for p in boundary_points]
ax3.plot(x, y, 'o', c='r', markersize=0.5)
ax3.set_xlim(r_min, r_max)
ax3.set_ylim(c_min, c_max)
ax3.set_aspect(1)
# find the boundary polygon using alpha_shape where 'sharpness' of the
# boundary is determined by the factor ALPHA
# a green boundary consists of multiple polygons, a red boundary on a single
# polygon
alpha = 0.03 # determines shape of the boundary polygon
bnd_polygon, _ = alpha_shape(boundary_points, alpha)
patches = []
if bnd_polygon.geom_type == 'Polygon':
patches.append(PolygonPatch(bnd_polygon))
ec = 'red'
else:
for poly in bnd_polygon:
patches.append(PolygonPatch(poly))
ec = 'green'
p = PatchCollection(patches, facecolor='none', edgecolor=ec, lw=1)
ax4.add_collection(p)
ax4.set_xlim(r_min, r_max)
ax4.set_ylim(c_min, c_max)
ax4.set_aspect(1)
plt.show()
if __name__ == "__main__":
main()

Mapping surface curvature to face?

I am trying to map surface curvature (mean, gaussian and principle curvature) values to surface faces. I have computed the curvature values for an artificially generated 3D surface (eg. cylinder). The resulting 3D surface that I am trying to get is something like this mean curvature mapped to surface. Can somebody guide me in how to get this?
The code for the surface I am creating is:
import math
import matplotlib.pyplot as plt
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
xindex = []
yindex = []
zindex = []
x = []
y = []
z = []
count = 1
surfaceSt = []
import numpy
numpy.set_printoptions(threshold=numpy.nan)
#surfaceStX = numpy.empty((10,36))
#surfaceStY = numpy.empty((10,36))
#surfaceStZ = numpy.empty((10,36))
surfaceStZ = []
surfaceStX = []
surfaceStY = []
for i in range(1,21):
if i < 11:
x = []
y = []
z = []
pt = []
ptX = []
ptY = []
ptZ = []
for t in range(0,360,10):
x = i*math.sin(math.radians(t))
y = i*math.cos(math.radians(t))
z = i-1
ptX.append(x)
ptY.append(y)
ptZ.append(z)
pt.append([x,y,z])
ptX.append(ptX[0])
ptY.append(ptY[0])
ptZ.append(ptZ[0])
surfaceStX.append(ptX)
surfaceStY.append(ptY)
surfaceStZ.append(ptZ)
# numpy.append(surfaceStX,ptX)
# numpy.append(surfaceStY,ptY)
# numpy.append(surfaceStZ,ptZ)
#ax.scatter(x,y,z)
elif i >= 11:
x = []
y = []
z = []
pt = []
ptX = []
ptY = []
ptZ = []
for t in range(0,360,10):
x = (i-count)*math.sin(math.radians(t))
y = (i-count)*math.cos(math.radians(t))
z = i-1
ptX.append(x)
ptY.append(y)
ptZ.append(z)
pt.append([x,y,z])
ptX.append(ptX[0])
ptY.append(ptY[0])
ptZ.append(ptZ[0])
surfaceStX.append(ptX)
surfaceStY.append(ptY)
surfaceStZ.append(ptZ)
count +=2
X = numpy.array(surfaceStX)
Y = numpy.array(surfaceStY)
Z = numpy.array(surfaceStZ)
ax = fig.add_subplot(111, projection='3d')
ax.plot_surface(X, Y, Z, rstride=1, cstride=1,shade = 'True' )
from surfaceCurvature import surface_curvature
Pcurvature,Gcurvature,Mcurvature = surface_curvature(X,Y,Z)
plt.show()
My surface curvature computation is given below (courtesy: https://github.com/sujithTSR/surface-curvature):
def surface_curvature(X,Y,Z):
(lr,lb)=X.shape
#print lr
#print "awfshss-------------"
#print lb
#First Derivatives
Xv,Xu=np.gradient(X)
Yv,Yu=np.gradient(Y)
Zv,Zu=np.gradient(Z)
#Second Derivatives
Xuv,Xuu=np.gradient(Xu)
Yuv,Yuu=np.gradient(Yu)
Zuv,Zuu=np.gradient(Zu)
Xvv,Xuv=np.gradient(Xv)
Yvv,Yuv=np.gradient(Yv)
Zvv,Zuv=np.gradient(Zv)
#2D to 1D conversion
#Reshape to 1D vectors
Xu=np.reshape(Xu,lr*lb)
Yu=np.reshape(Yu,lr*lb)
Zu=np.reshape(Zu,lr*lb)
Xv=np.reshape(Xv,lr*lb)
Yv=np.reshape(Yv,lr*lb)
Zv=np.reshape(Zv,lr*lb)
Xuu=np.reshape(Xuu,lr*lb)
Yuu=np.reshape(Yuu,lr*lb)
Zuu=np.reshape(Zuu,lr*lb)
Xuv=np.reshape(Xuv,lr*lb)
Yuv=np.reshape(Yuv,lr*lb)
Zuv=np.reshape(Zuv,lr*lb)
Xvv=np.reshape(Xvv,lr*lb)
Yvv=np.reshape(Yvv,lr*lb)
Zvv=np.reshape(Zvv,lr*lb)
Xu=np.c_[Xu, Yu, Zu]
Xv=np.c_[Xv, Yv, Zv]
Xuu=np.c_[Xuu, Yuu, Zuu]
Xuv=np.c_[Xuv, Yuv, Zuv]
Xvv=np.c_[Xvv, Yvv, Zvv]
# First fundamental Coeffecients of the surface (E,F,G)
E=np.einsum('ij,ij->i', Xu, Xu)
F=np.einsum('ij,ij->i', Xu, Xv)
G=np.einsum('ij,ij->i', Xv, Xv)
m=np.cross(Xu,Xv,axisa=1, axisb=1)
p=np.sqrt(np.einsum('ij,ij->i', m, m))
n=m/np.c_[p,p,p]
# Second fundamental Coeffecients of the surface (L,M,N), (e,f,g)
L= np.einsum('ij,ij->i', Xuu, n) #e
M= np.einsum('ij,ij->i', Xuv, n) #f
N= np.einsum('ij,ij->i', Xvv, n) #g
# Gaussian Curvature
K=(L*N-M**2)/(E*G-F**2)
K=np.reshape(K,lr*lb)
# Mean Curvature
H = (E*N + G*L - 2*F*M)/((E*G - F**2))
H = np.reshape(H,lr*lb)
# Principle Curvatures
Pmax = H + np.sqrt(H**2 - K)
Pmin = H - np.sqrt(H**2 - K)
#[Pmax, Pmin]
Principle = [Pmax,Pmin]
return Principle,K,H
EDIT 1:
I tried a few things based on the link provided by armatita. Following is my code:
'''
Creat half cylinder
'''
import numpy
import matplotlib.pyplot as plt
import math
ptX= []
ptY = []
ptZ = []
ptX1 = []
ptY1 = []
ptZ1 = []
for i in range(0,10):
x = []
y = []
z = []
for t in range(0,200,20):
x.append(10*math.cos(math.radians(t)))
y.append(10*math.sin(math.radians(t)))
z.append(i)
x1= 5*math.cos(math.radians(t))
y1 = 5*math.sin(math.radians(t))
z1 = i
ptX1.append(x1)
ptY1.append(y1)
ptZ1.append(z1)
ptX.append(x)
ptY.append(y)
ptZ.append(z)
X = numpy.array(ptX)
Y = numpy.array(ptY)
Z = numpy.array(ptZ)
fig = plt.figure()
ax = fig.add_subplot(111,projection = '3d')
from surfaceCurvature import surface_curvature
p,g,m= surface_curvature(X,Y,Z)
n = numpy.reshape(m,numpy.shape(X))
ax.plot_surface(X,Y,Z, rstride=1, cstride=1)
plt.show()
'''
Map mean curvature to color
'''
import numpy as np
X1 = X.ravel()
Y1 = Y.ravel()
Z1 = Z.ravel()
from scipy.interpolate import RectBivariateSpline
# Define the points at the centers of the faces:
y_coords, x_coords = np.unique(Y1), np.unique(X1)
y_centers, x_centers = [ arr[:-1] + np.diff(arr)/2 for arr in (y_coords, x_coords)]
# Convert back to a 2D grid, required for plot_surface:
#Y1 = Y.reshape(y_coords.size, -1)
#X1 = X.reshape(-1, x_coords.size)
#Z1 = Z.reshape(X.shape)
C = m.reshape(X.shape)
C -= C.min()
C /= C.max()
interp_func = RectBivariateSpline(x_coords, y_coords, C.T, kx=1, ky=1)
I get the following error:
raise TypeError('y dimension of z must have same number of y')
TypeError: y dimension of z must have same number of elements as y
All the dimensions are same. Can anybody tell what's going wrong with my implementation?

I think you need to figure out exactly what you need. Looking at your code I notice you are producing variables that have no use. Also you seem to have a function to calculate the surface curvature but than you try to make some calculations using the np.unique function for which I cannot see the purpose here (and that is why that error appears).
So let's assume this:
You have a function that returns the curvature value for each cell.
You have the X,Y and Z meshes to plot that surface.
Using your code, and assuming you m variable is the curvature (again this is in your code), if I do this:
import numpy
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from matplotlib import cm
import math
# Here would be the surface_curvature function
X = numpy.array(ptX)
Y = numpy.array(ptY)
Z = numpy.array(ptZ)
p,g,m= surface_curvature(X,Y,Z)
C = m.reshape(X.shape)
C -= C.min()
C /= C.max()
fig = plt.figure()
ax = fig.add_subplot(111,projection = '3d')
n = numpy.reshape(m,numpy.shape(X))
ax.plot_surface(X,Y,Z,facecolors = cm.jet(C), rstride=1, cstride=1)
plt.show()
, I obtain this:
Which is a value mapped to color in a matplotlib surface. If that C you've built is not the actual curvature you need to replace it by the one that is.

Minimum spanning tree : distance and graph

i'm stuck in a minimum spanning tree problem.
I've found a code that resolve the problem and return a graph of the solution, I've edited it a bit but I don't find a way to return the distance along to the graphic, as I don't have much python experience.
Can you help me ? Thanks
import numpy as np
from scipy.spatial.distance import pdist, squareform
import matplotlib.pyplot as plt
def minimum_spanning_tree(X, copy_X=True):
if copy_X:
X = X.copy()
if X.shape[0] != X.shape[1]:
raise ValueError("X needs to be square matrix of edge weights")
n_vertices = X.shape[0]
spanning_edges = []
visited_vertices = [0]
num_visited = 1
diag_indices = np.arange(n_vertices)
X[diag_indices, diag_indices] = np.inf
while num_visited != n_vertices:
new_edge = np.argmin(X[visited_vertices], axis=None)
new_edge = divmod(new_edge, n_vertices)
new_edge = [visited_vertices[new_edge[0]], new_edge[1]]
spanning_edges.append(new_edge)
visited_vertices.append(new_edge[1])
X[visited_vertices, new_edge[1]] = np.inf
X[new_edge[1], visited_vertices] = np.inf
num_visited += 1
return np.vstack(spanning_edges)
def test_mst():
n = int(input())
P = np.array([], int)
for i in range(0,n):
y = list(map(int,input().split()))
P = np.append(P,[y[0],y[1]], axis=0)
P = P.reshape(n,2)
X = squareform(pdist(P))
edge_list = minimum_spanning_tree(X)
plt.scatter(P[:, 0], P[:, 1])
for edge in edge_list:
i, j = edge
plt.plot([P[i, 0], P[j, 0]], [P[i, 1], P[j, 1]], c='r')
plt.show()
if __name__ == "__main__":
test_mst()

numpy to generate discrete probability distribution

I'm following a code example I found at http://docs.scipy.org/doc/scipy/reference/tutorial/stats.html#subclassing-rv-discrete for implementing a random number generator for discrete values of a normal distribution. The exact example (not surprisingly) works quite well, but if I modify it to allow only left or right-tailed results, the distribution around 0 should is too low (bin zero should contain more values). I must have hit a boundary condition, but am unable to work it out. Am I missing something?
This is the result of counting the random numbers per bin:
np.bincount(rvs) [1082 2069 1833 1533 1199 837 644 376 218 111 55 20 12 7 2 2]
This is the histogram:
from scipy import stats
np.random.seed(42)
def draw_discrete_gaussian(rng, tail='both'):
# number of integer support points of the distribution minus 1
npoints = rng if tail == 'both' else rng * 2
npointsh = npoints / 2
npointsf = float(npoints)
# bounds for the truncated normal
nbound = 4
# actual bounds of truncated normal
normbound = (1+1/npointsf) * nbound
# integer grid
grid = np.arange(-npointsh, npointsh+2, 1)
# bin limits for the truncnorm
gridlimitsnorm = (grid-0.5) / npointsh * nbound
# used later in the analysis
gridlimits = grid - 0.5
grid = grid[:-1]
probs = np.diff(stats.truncnorm.cdf(gridlimitsnorm, -normbound, normbound))
gridint = grid
normdiscrete = stats.rv_discrete(values=(gridint, np.round(probs, decimals=7)), name='normdiscrete')
# print 'mean = %6.4f, variance = %6.4f, skew = %6.4f, kurtosis = %6.4f'% normdiscrete.stats(moments = 'mvsk')
rnd_val = normdiscrete.rvs()
if tail == 'both':
return rnd_val
if tail == 'left':
return -abs(rnd_val)
elif tail == 'right':
return abs(rnd_val)
rng = 15
tail = 'right'
rvs = [draw_discrete_gaussian(rng, tail=tail) for i in xrange(10000)]
if tail == 'both':
rng_min = rng / -2.0
rng_max = rng / 2.0
elif tail == 'left':
rng_min = -rng
rng_max = 0
elif tail == 'right':
rng_min = 0
rng_max = rng
gridlimits = np.arange(rng_min-.5, rng_max+1.5, 1)
print gridlimits
f, l = np.histogram(rvs, bins=gridlimits)
# cheap way of creating histogram
import matplotlib.pyplot as plt
%matplotlib inline
bins, edges = f, l
left,right = edges[:-1],edges[1:]
X = np.array([left, right]).T.flatten()
Y = np.array([bins, bins]).T.flatten()
# print 'rvs', rvs
print 'np.bincount(rvs)', np.bincount(rvs)
plt.plot(X,Y)
plt.show()

I try to answer my own question based on comments from #user333700 and #user235711:
I insert into the method before normdiscrete = ...
if tail == 'right':
gridint = gridint[npointsh:]
probs = probs[npointsh:]
s = probs.sum()
probs = probs / s
elif tail == 'left':
gridint = gridint[0: npointsh]
probs = probs[0: npointsh]
s = probs.sum()
probs = probs / s
The resulting histograms and look much nicer:

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

Find number of connected components of a k-nearest neighbours graph? - python

I think you can use scipy's connected_components and scikit-learn's kneighbors_graph together. Does this produce what you're looking for? from sklearn import neighbors from scipy.sparse import csgraph adj = neighbors.kneighbors_graph(X, N_k) n_components, labels = csgraph.connected_components(adj)

Related

plot decision boundary in python

Polygon from set of points that lie on a boundary

Mapping surface curvature to face?

Minimum spanning tree : distance and graph

numpy to generate discrete probability distribution

Categories

Resources