I have a problem understanding the A* Algorithm (Python) - python

I'm trying to look into the A* Algorithm but I'm kind of having a hard time understanding a specific part. So the A* Algorithm Python Code with the example is this:
class Node():
"""A node class for A* Pathfinding"""
def __init__(self, parent=None, position=None):
self.parent = parent
self.position = position
self.g = 0
self.h = 0
self.f = 0
def __eq__(self, other):
return self.position == other.position
def astar(maze, start, end):
"""Returns a list of tuples as a path from the given start to the given end in the given maze"""
# Create start and end node
start_node = Node(None, start)
start_node.g = start_node.h = start_node.f = 0
end_node = Node(None, end)
end_node.g = end_node.h = end_node.f = 0
# Initialize both open and closed list
open_list = []
closed_list = []
# Add the start node
open_list.append(start_node)
# Loop until you find the end
while len(open_list) > 0:
# Get the current node
current_node = open_list[0]
current_index = 0
for index, item in enumerate(open_list):
if item.f < current_node.f:
current_node = item
current_index = index
# Pop current off open list, add to closed list
open_list.pop(current_index)
closed_list.append(current_node)
# Found the goal
if current_node == end_node:
path = []
current = current_node
while current is not None:
path.append(current.position)
current = current.parent
return path[::-1] # Return reversed path
# Generate children
children = []
for new_position in [(0, -1), (0, 1), (-1, 0), (1, 0), (-1, -1), (-1, 1), (1, -1), (1, 1)]: # Adjacent squares
# Get node position
node_position = (current_node.position[0] + new_position[0], current_node.position[1] + new_position[1])
# Make sure within range
if node_position[0] > (len(maze) - 1) or node_position[0] < 0 or node_position[1] > (len(maze[len(maze)-1]) -1) or node_position[1] < 0:
continue
# Make sure walkable terrain
if maze[node_position[0]][node_position[1]] != 0:
continue
# Create new node
new_node = Node(current_node, node_position)
# Append
children.append(new_node)
# Loop through children
for child in children:
# Child is on the closed list
for closed_child in closed_list:
if child == closed_child:
continue
# Create the f, g, and h values
child.g = current_node.g + 1
child.h = ((child.position[0] - end_node.position[0]) ** 2) + ((child.position[1] - end_node.position[1]) ** 2)
child.f = child.g + child.h
# Child is already in the open list
for open_node in open_list:
if child == open_node and child.g > open_node.g:
continue
# Add the child to the open list
open_list.append(child)
def main():
maze = [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 1, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 1, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 1, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 1, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 1, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 1, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 1, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 1, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 1, 0, 0, 0, 0, 0]]
start = (4, 3)
end = (4, 5)
path = astar(maze, start, end)
print(path)
if __name__ == '__main__':
main()
In the
for index, item in enumerate(open_list):
if item.f < current_node.f:
current_node = item
current_index = index
I don't get how the current_node can be defined as the item in the maze I've given above. In the example I've given above, the start = (4,3) and end = (4,5), giving the only possible shortest distance would be as something like the following:
maze = [[0, 0, 0, 0, *, 0, 0, 0, 0, 0],
[0, 0, 0, *, 1, *, 0, 0, 0, 0],
[0, 0, 0, *, 1, *, 0, 0, 0, 0],
[0, 0, 0, *, 1, *, 0, 0, 0, 0],
[0, 0, 0, s, 1, e, 0, 0, 0, 0],
[0, 0, 0, 0, 1, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 1, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 1, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 1, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 1, 0, 0, 0, 0, 0]]
with the s being the start_node and e being the end_node.
However, in the code of the A* Algorithm, the current_node becomes the item only if the item.f is smaller than the current_node.f. In the example I've given here, I can't see that the first * would have an f value smaller than the f value of the start_node - I mean, in the code, we already have described the start_node.f = 0 haven't we? And we defined the first current_node as the start_node... so no item in the open_list would have an item.f value smaller than zero..
How is this possible?? Or am I missing something here??

I think the clue is that you have to take into account the two lines above this for loop as well:
# Get the current node
current_node = open_list[0]
current_index = 0
for index, item in enumerate(open_list):
if item.f < current_node.f:
current_node = item
current_index = index
What happens:
In the first iteration of your while loop:
There is only one item in the open_list, being the start_node where indeed f=0
So after the above code block, this start node becomes the current_node
Right after the above loop the start_node is removed from the open_list: open_list.pop(current_index)
The open_list is then populated by the valid neighbouring locations (by walking its children)
In the second iteration of your while loop:
The above code block looks for the item in the open_list with the lowest f value
because of the first line current_node = open_list[0], you will be sure that the new current_node is always one from the open_list.
as the start_node has been removed from the open_list, it will for sure be replaced here

Related

Python Matrix check for diagonals and horizontals correctly to win a game

I'm building a game like Connect4 the user can connect any number. I am having a problem checking the horizontal and diagonals lines:
def WinningSequenceCheck(board,piece,row_count):
#Horizontal Check
piecePlayer1=0
piecePlayer2=0
win_sequence=board[0]["TamanhoSequĂȘncia"]
width=board[0]["CumpGrelha"]
height=row_count
for c in range(height+1):
for r in range(width):
if board[0]["Tabuleiro"][c][r]==1:
piecePlayer1+=1
if piecePlayer1==win_sequence:
return True
if board[0]["Tabuleiro"][c][r]==2:
pecasPlayer2+=1
if pecasPlayer2==win_sequence:
return True
#For diagonals i did this:
diag1=0
diag2=0
a=0
b=height
d=0
for c in range(width):
for l in range(height+1):
if board[0]["Tabuleiro"][b-l][l+a]==1:
diag1+=1
if diag1==win_sequence:
return True
if board[0]["Tabuleiro"][b-l][l+a]==2:
diag2+=1
if diag2==win_sequence:
return True
if (b-l<height):
b=height
if l<=l-1:
a=a
elif(a+l<width-1):
a+=1
elif(a+l<=width-1):
return
else:
break
The problem with diagonals is that it goes from left to right but it's not checking properly if there is a line of a winning sequence of 1s or 2s
If it finds it should return true because im incrementing each time he finds 1 or 2
Im i doing the horizontal check properly?
Is there a better way to check in diagonals ?
The expected result from the horizontal is that if it finds the number 1 it adds to the variable piecePlayer1 (for the player 1)
If the piecePlayer1 is equal to Victory sequence ( example victory sequence is 5) the game returns true because in one of the lines he found 5 one's .
Winning Sequence=5
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
[1, 1, 1, 1, 1, 0, 0, 0, 0, 0] ---> Win
The same is for the diagonal check:
Winning Sequence =4
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
[0, 0, 0, 1, 0, 0, 0, 0, 0, 0] Diagonal WIN
[0, 0, 1, 0, 0, 0, 0, 0, 0, 0]
[0, 1, 0, 0, 0, 0, 0, 0, 0, 0]
[1, 0, 0, 0, 0, 0, 0, 0, 0, 0]
#Stunna, here is simple sample code to check the diagonal values, you can try it and modify to suit your main module:
def winner(board, length):
"""Returns the 'mark' of the winning player"""
W = range(len(board)) # width
H = range(len(board[0])) # height
directions = [(0, 1), (1, 0), (1, 1), (1, -1)]
for dx, dy in directions:
edges = []
if dx > 0:
edges += [(0, y) for y in H]
if dy > 0: # scanning down
edges += [(x, 0) for x in W]
if dy < 0: # scanning up
edges += [(x, H[-1]) for x in W]
for ex, ey in edges:
row = 0; mark = None
x, y = ex, ey
while x in W and y in H:
if board[x][y] == mark:
row += 1
else:
mark = board[x][y]
row = 1
if mark is not None and row >= length:
return mark
x, y = x + dx, y + dy
return None
print(winner([
['X', 'O', 'O', 'O'],
['O', 'X', 'O', 'X'],
['O', 'O', 'X', 'O'],
['O', 'X', 'X', 'X'] ], 4)) # X

How to write a A-star search algorithm to visit all the nodes without a specific end-point?

I am trying to write an algorithm to collect as many as possible items from different stops. which every stop is a cell in a matrices. the goal is to find the shortest path to collect all the items. The start point is defined, but the end-point can be anywhere... I used the A-star search algorithm for it...
Here is code:
import numpy as np
class Node:
"""
A node class for A* Pathfinding
parent is parent of the current Node
position is current position of the Node in the maze
g is cost from start to current Node
h is heuristic based estimated cost for current Node to end Node
f is total cost of present node i.e. : f = g + h
"""
def __init__(self, parent=None, position=None):
self.parent = parent
self.position = position
self.g = 0
self.h = 0
self.f = 0
def __eq__(self, other):
return self.position == other.position
#This function return the path of the search
def return_path(current_node,maze):
path = []
no_rows, no_columns = np.shape(maze)
# here we create the initialized result maze with -1 in every position
result = [[-1 for i in range(no_columns)] for j in range(no_rows)]
current = current_node
while current is not None:
path.append(current.position)
current = current.parent
# Return reversed path as we need to show from start to end path
path = path[::-1]
start_value = 0
# we update the path of start to end found by A-star serch with every step incremented by 1
for i in range(len(path)):
result[path[i][0]][path[i][1]] = start_value
start_value += 1
return result
def search(maze, cost, start, end):
"""
Returns a list of tuples as a path from the given start to the given end in the given maze
:param maze:
:param cost
:param start:
:param end:
:return:
"""
# Create start and end node with initized values for g, h and f
start_node = Node(None, tuple(start))
start_node.g = start_node.h = start_node.f = 0
end_node = Node(None, tuple(end))
end_node.g = end_node.h = end_node.f = 0
# Initialize both yet_to_visit and visited list
# in this list we will put all node that are yet_to_visit for exploration.
# From here we will find the lowest cost node to expand next
yet_to_visit_list = []
# in this list we will put all node those already explored so that we don't explore it again
visited_list = []
# Add the start node
yet_to_visit_list.append(start_node)
# Adding a stop condition. This is to avoid any infinite loop and stop
# execution after some reasonable number of steps
outer_iterations = 0
max_iterations = (len(maze) // 2) ** 10
# what squares do we search . serarch movement is left-right-top-bottom
#(4 movements) from every positon
move = [[-1, 0 ], # go up
[ 0, -1], # go left
[ 1, 0 ], # go down
[ 0, 1 ]] # go right
"""
1) We first get the current node by comparing all f cost and selecting the lowest cost node for further expansion
2) Check max iteration reached or not . Set a message and stop execution
3) Remove the selected node from yet_to_visit list and add this node to visited list
4) Perofmr Goal test and return the path else perform below steps
5) For selected node find out all children (use move to find children)
a) get the current postion for the selected node (this becomes parent node for the children)
b) check if a valid position exist (boundary will make few nodes invalid)
c) if any node is a wall then ignore that
d) add to valid children node list for the selected parent
For all the children node
a) if child in visited list then ignore it and try next node
b) calculate child node g, h and f values
c) if child in yet_to_visit list then ignore it
d) else move the child to yet_to_visit list
"""
#find maze has got how many rows and columns
no_rows, no_columns = np.shape(maze)
# Loop until you find the end
while len(yet_to_visit_list) > 0:
# Every time any node is referred from yet_to_visit list, counter of limit operation incremented
outer_iterations += 1
# Get the current node
current_node = yet_to_visit_list[0]
current_index = 0
for index, item in enumerate(yet_to_visit_list):
if item.f < current_node.f:
current_node = item
current_index = index
# if we hit this point return the path such as it may be no solution or
# computation cost is too high
if outer_iterations > max_iterations:
print ("giving up on pathfinding too many iterations")
return return_path(current_node,maze)
# Pop current node out off yet_to_visit list, add to visited list
yet_to_visit_list.pop(current_index)
visited_list.append(current_node)
# test if goal is reached or not, if yes then return the path
if current_node == end_node:
return return_path(current_node,maze)
# Generate children from all adjacent squares
children = []
for new_position in move:
# Get node position
node_position = (current_node.position[0] + new_position[0], current_node.position[1] + new_position[1])
# Make sure within range (check if within maze boundary)
if (node_position[0] > (no_rows - 1) or
node_position[0] < 0 or
node_position[1] > (no_columns -1) or
node_position[1] < 0):
continue
# Make sure walkable terrain
if maze[node_position[0]][node_position[1]] != 0:
continue
# Create new node
new_node = Node(current_node, node_position)
# Append
children.append(new_node)
# Loop through children
for child in children:
# Child is on the visited list (search entire visited list)
if len([visited_child for visited_child in visited_list if visited_child == child]) > 0:
continue
# Create the f, g, and h values
child.g = current_node.g + cost
## Heuristic costs calculated here, this is using eucledian distance
child.h = (((child.position[0] - end_node.position[0]) ** 2) +
((child.position[1] - end_node.position[1]) ** 2))
child.f = child.g + child.h
# Child is already in the yet_to_visit list and g cost is already lower
if len([i for i in yet_to_visit_list if child == i and child.g > i.g]) > 0:
continue
# Add the child to the yet_to_visit list
yet_to_visit_list.append(child)
if name == 'main':
maze = [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 , 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 , 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 , 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 , 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 , 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 , 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 , 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 , 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 , 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 , 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 , 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 , 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 , 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 , 0]]
start =[10, 6] # starting position
end = [14,20] # ending position
cost = 1 # cost per movement
path = search(maze,cost, start, end)
# print(path)
print('\n'.join([''.join(["{:" ">3d}".format(item) for item in row])
for row in path]))

List calculation with elements not equal to zero

Let's assume I have a list:
list1 = [16620, 22032, 0, 0, 0, 136813, 137899, 0, 199546, 204804]
I am looking for a new list that subtracts every 'non-zero' value from the following 'non-zero' value, e.g. 22032-16620, 137899-136813. 'Zero' values will stay untouched.
In addition to that, the subtracted 'non-zero' value should change to zero.
The output would look something like:
list2 = [0, 5412, 0, 0, 0, 0, 1086, 0, 0, 5258]
Please note that the numbers, the length of the list and its distribution of elements may vary, e.g. a list could also look like
list1 = [0, 0, 0, 0, 95472, 0, 0, 104538, 0, 0, 0, 0, 187649, 0, 0, 204841, 0, 0, 0, 0, 0, 0, 0, 0]
which should turn into:
list2 = [0, 0, 0, 0, 0, 0, 0, 9066, 0, 0, 0, 0, 0, 0, 0, 17192, 0, 0, 0, 0, 0, 0, 0, 0]
As you can see, the number of elements stays the same for list1 and list2. Also, there is always an even number of 'zero' values and an even number of 'non-zero' values.
Help is greatly appreciated!
What I have so far:
from itertools import cycle, chain
list1 = [11545, 15334, 71341, 73861, 0, 0, 170374, 171671]
newlist = [list1[i + 1] - list1[i] for i in range(len(list1)-1)]
list2 = list(chain.from_iterable(zip(newlist[0::2], cycle([int()]))))
print list2
The output print list2 looks like I imagine it to be, yet it won't work for a list that looks like:
list1 = [16620, 22032, 0, 0, 0, 136813, 137899, 0, 199546, 204804]
Copy over, but adjust non-zero values, keeping track of the previous one.
list2 = []
prev = None
for curr in list1:
if curr:
if prev:
curr -= prev
prev = None
else:
prev = curr
curr = 0
list2.append(curr)

Python: Boundary points of a non-convex grid

I have the following situation as shown in the Figure below:
I want to find out the grid points that surround the red points. The red points are trajectories of moving agents. So in many situations we have a bunch of points, therefore the solution should be as fast as possible.
The grid is plotted as points.
First step, I managed to reduce the number of grid points as shown below (plotted as x):
This is my code:
step = .5
gridX, gridY = np.meshgrid(np.arange(xmin-step, xmax+step, step), np.arange(ymin-step, ymax+step, step))
mask = False * np.empty_like(gridX, dtype=bool)
threshold = 0.5
for (x,y) in zip(df_traj['X'], df_traj['Y']):
pX = x * np.ones_like(gridX)
pY = y * np.ones_like(gridY)
distX = (pX - gridX)**2
distY = (pY - gridY)**2
dist = np.sqrt(distX + distY)
condition = (dist < threshold)
mask = mask | condition
gX = gridX*mask
gY = gridY*mask
Second step, and this is where I need a little help:
How can I efficiently filter out the inner points of the grid and keep only the "x-points" outside the "red area"?
EDIT
In this special case I have 92450 red points.
I think if you just walk around the edge, since its a evenly spaced grid, it should work. No need for far more complicated non-convex-hull to handle pnts that can be anywhere. This isn't adapted to your code and I cheat with my data structures to make the code easy so youll have to handle that but it think as psuedocode it should work.
pnts = <<lists of points>>
edge_pnts = []
fpnt = pnt_with_min_x_then_min_y
cpnt = fpnt
npnt = None
while npnt != fpnt:
if (cpnt[0] + 1, cpnt[1] ) in pnts: npnt = (cpnt[0] + 1, cpnt[1] )
elif (cpnt[0] + 1, cpnt[1] + 1) in pnts: npnt = (cpnt[0] + 1, cpnt[1] + 1)
elif (cpnt[0], cpnt[1] + 1) in pnts: npnt = (cpnt[0] , cpnt[1] + 1)
elif (cpnt[0] - 1, cpnt[1] + 1) in pnts: npnt = (cpnt[0] - 1, cpnt[1] + 1)
elif (cpnt[0] - 1, cpnt[1] ) in pnts: npnt = (cpnt[0] - 1, cpnt[1] )
elif (cpnt[0] - 1, cpnt[1] - 1) in pnts: npnt = (cpnt[0] - 1, cpnt[1] - 1)
elif (cpnt[0] , cpnt[1] - 1) in pnts: npnt = (cpnt[0] , cpnt[1] - 1)
elif (cpnt[0] + 1, cpnt[1] - 1) in pnts: npnt = (cpnt[0] + 1, cpnt[1] - 1)
else: raise ValueError("Oh no!")
edge_pnts.append(npnt)
cpnt = npnt
For non convex polygons, like your example, convex hull is not a solution. My recommendation is that, given you already have a discrete grid, that you simply attribute the value False to a bool grid cell when a sample occurs inside. Something like this:
import numpy as np
import matplotlib.pyplot as plt
# Generic data production
X, Y = np.random.normal(0, 1, 100000), np.random.normal(0, 1, 100000)
ind = np.where((X > 0) & (Y > 0))
X[ind] = 0
Y[ind] = 0
# Generic grid definition
step = 0.5
xmin, xmax = X.min(), X.max()
ymin, ymax = Y.min(), Y.max()
firstx = xmin-step/2
firsty = ymin-step/2
lastx = xmax+2*step/2
lasty = ymax+2*step/2
gridX, gridY = np.meshgrid(np.arange(firstx, lastx, step), np.arange(firsty, lasty, step))
# This is the actual code that computes inside or outside
bool_grid = np.ones(gridX.shape, dtype="bool")
bool_grid[np.int_(0.5+(Y-firsty)/step), np.int_(0.5+(X-firstx)/step)] = False
# Plot code
plt.scatter(gridX.flatten(), gridY.flatten(), marker="+", color="black", alpha=0.3)
plt.scatter(gridX[bool_grid].flatten(), gridY[bool_grid].flatten(), marker="+", s=90, color="green")
plt.scatter(X, Y, s=10, color="red")
plt.show()
, which results in the following (green crosses are True values):
NOTE: This is very fast but it has some limitations. If your data is not compact you'll have True values inside the shape (so holes are possible). It's possible to process the image to remove the holes however (a flood fill or a moving window based algorithm for example). Another possibility is to play with the resolution of the grid.
You just need to pick a point you know is on the hull (let's take the leftmost point among the topmost points), and assume you "got to it" from above (as we know there is no points above it).
now while the next point is not in your list:
Try going CCW from the direction you came from.
The code looks like that:
matrix = [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0],
[0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0],
[0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]
# Find the leftmost topmost point
first_point = None
for i in range(len(matrix)):
if first_point:
break
for j in range(len(matrix[0])):
if matrix[i][j]:
first_point = [i, j]
break
next_point = first_point
prev_direction = 'up'
next_direction_dict = {'up': 'left', 'left': 'down', 'down': 'right', 'right': 'up'}
opposite_direction = {'up': 'down', 'left': 'right', 'down': 'up', 'right': 'left'}
hull_points = []
def go_direction(point, direction):
# Find the point to a given direction of a given point
i = point[0]
j = point[1]
if direction == 'right':
j += 1
elif direction == 'up':
i -= 1
elif direction == 'left':
j -= 1
elif direction == 'down':
i += 1
else:
raise ValueError
return [i, j]
def find_next_point(matrix, point, prev_direction):
next_direction = next_direction_dict[prev_direction]
next_point = go_direction(point, next_direction)
prev_direction = next_direction
while not matrix[next_point[0]][next_point[1]]:
next_direction = next_direction_dict[prev_direction]
next_point = go_direction(point, next_direction)
prev_direction = next_direction
from_direction = opposite_direction[prev_direction]
return next_point, from_direction
next_point, prev_direction = find_next_point(matrix, next_point, prev_direction)
while next_point != first_point:
if next_point not in hull_points:
hull_points.append(next_point)
next_point, prev_direction = find_next_point(matrix, next_point, prev_direction)
Edit:
Now also handles single point 'tentacles' by iterating until returning to the first point
Heres another though that came to my mind --
A flood fill of the space:
pnts = <<lists of points>>
seen = set()
edges = []
stack = (0,0)
while stack:
ele = stack.pop()
if ele in pnts:
edges.append(ele)
else:
seen.add(ele)
if (ele[0] + 1, ele[1]) not in seen:
stack.append(ele[0] + 1, ele[1])
if (ele[0] - 1, ele[1]) not in seen:
stack.append(ele[0] - 1, ele[1])
if (ele[0], ele[1] + 1) not in seen:
stack.append(ele[0], ele[1] + 1)
if (ele[0], ele[1] - 1) not in seen:
stack.append(ele[0], ele[1] - 1)
Then you need to sort the points which shouldn't be too hard.

Computing Adjusted Rand Index

I am trying to compute the ARI between two sets of clusters, using this code:
#computes ARI for this type of clustering
def ARI(table,n):
index = 0
sum_a = 0
sum_b = 0
for i in range(len(table)-1):
for j in range(len(table)-1):
sum_a += choose(table[i][len(table)-1],2)
sum_b += choose(table[len(table)-1][j],2)
index += choose(table[i][j],2)
expected_index = (sum_a*sum_b)
expected_index = expected_index/choose(n,2)
max_index = (sum_a+sum_b)
max_index = max_index/2
return (index - expected_index)/(max_index-expected_index)
#choose to compute rand
def choose(n,r):
f = math.factorial
if (n-r)>=0:
return f(n) // f(r) // f(n-r)
else:
return 0
assuming I have created the contingency table correctly, I still get values outside the range of (-1,1).
For instance:
Contingency table:
[1, 0, 0, 0, 0, 0, 0, 1]
[1, 0, 0, 0, 0, 0, 0, 1]
[0, 0, 0, 1, 0, 0, 0, 1]
[0, 1, 0, 0, 0, 0, 0, 1]
[0, 0, 0, 0, 0, 1, 1, 2]
[1, 0, 1, 0, 1, 0, 0, 3]
[0, 0, 0, 0, 0, 0, 1, 1]
[3, 1, 1, 1, 1, 1, 2, 0]
yields an ARI of -1.6470588235294115 when I run my code.
Is there a bug in this code?
Also Here is how I am computing the contingency matrix:
table = [[0 for _ in range(len(subjects)+1)]for _ in range(len(subjects)+1)]
#comparing all clusters
for i in range(len(clusters)):
index_count = 0
for subject, orgininsts in orig_clusters.items():
madeinsts = clusters[i].instances
intersect_count = 0
#comparing all instances between the 2 clusters
for orginst in orgininsts:
for madeinst in makeinsts:
if orginst == madeinst:
intersect_count += 1
table[index_count][i] = intersect_count
index_count += 1
for i in range(len(table)-1):
a = 0
b = 0
for j in range(len(table)-1):
a += table[i][j]
b += table[j][i]
table[i][len(table)-1] = a
table[len(table)-1][i] = b
clusters is a list of cluster objects that have attribute instances, which is a list of instances contained in that cluster. orig_clusters is a dictonary with keys representing cluster labels, and values are a list of instances contained in that cluster. Is there a bug in this code?
You make some mistakes calculating the ARI in your code -- you calculate a and b too often because you loop over your table twice instead of just once.
Also, you pass n as a parameter, but apparently it is set to 10 (that is how I get your result). It would be easier to just pass the table and then calculate n from there. I fixed your code a bit:
def ARI(table):
index = 0
sum_a = 0
sum_b = 0
n = sum([sum(subrow) for subrow in table]) #all items summed
for i in range(len(table)):
b_row = 0#this is to hold the col sums
for j in range(len(table)):
index += choose(table[i][j], 2)
b_row += table[j][i]
#outside of j-loop b.c. we want to use a=rowsums, b=colsums
sum_a += choose(sum(table[i]), 2)
sum_b += choose(b_row, 2)
expected_index = (sum_a*sum_b)
expected_index = expected_index/choose(n,2)
max_index = (sum_a+sum_b)
max_index = max_index/2
return (index - expected_index)/(max_index-expected_index)
or if you pass on the table with row- and column sums:
def ARI(table):
index = 0
sum_a = 0
sum_b = 0
n = sum(table[len(table)-1]) + sum([table[i][len(table)-1] for i in range(len(table)-1)])
for i in range(len(table)-1):
sum_a += choose(table[i][len(table)-1],2)
sum_b += choose(table[len(table)-1][i],2)
for j in range(len(table)-1):
index += choose(table[i][j],2)
expected_index = (sum_a*sum_b)
expected_index = expected_index/choose(n,2)
max_index = (sum_a+sum_b)
max_index = max_index/2
return (index - expected_index)/(max_index-expected_index)
then
def choose(n,r):
f = math.factorial
if (n-r)>=0:
return f(n) // f(r) // f(n-r)
else:
return 0
table = [[1, 0, 0, 0, 0, 0, 0, 1],
[1, 0, 0, 0, 0, 0, 0, 1],
[0, 0, 0, 1, 0, 0, 0, 1],
[0, 1, 0, 0, 0, 0, 0, 1],
[0, 0, 0, 0, 0, 1, 1, 2],
[1, 0, 1, 0, 1, 0, 0, 3],
[0, 0, 0, 0, 0, 0, 1, 1],
[3, 1, 1, 1, 1, 1, 2, 0]]
ARI(table)
ARI(table)
Out[56]: -0.0604008667388949
The correct result!

Categories