Related
def riverSizes(matrix):
rows, cols = len(matrix), len(matrix[0])
visited = set()
res = []
def bfs(row, col, width):
max_width = width
directions = [(0, 1), (1, 0), (-1, 0), (0, -1)]
for dr, dc in directions:
r, c = row + dr, col + dc
if (r,c) not in visited and r < rows and c < cols and r >= 0 and c >=0 and matrix[r][c] == 1:
visited.add((r,c))
max_width = max(bfs(r, c, width + 1), max_width)
print(max_width)
return max_width
for r in range(rows):
for c in range(cols):
if matrix[r][c] == 1 and (r, c) not in visited:
visited.add((r, c))
val = bfs(r, c, 1)
res.append(val)
return res
Input:
[[1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0],
[1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0],
[0, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1],
[1, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0],
[1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1]]
My output: [2, 1, 15, 5, 2, 1]
Expected output: [2, 1, 21, 5, 2, 1]
I am concerned that in the case where my recusion branches out in multiple directions, it isn't adding all the additional widths together.
I was helped by friend who corrected me that my approach is actually Depth First Search. I was mistakenly using the max function when instead all I needed to do was increment the width and return the width.
def riverSizes(matrix):
rows, cols = len(matrix), len(matrix[0])
visited = set()
res = []
def dfs(row, col, width):
directions = [(0, 1), (1, 0), (-1, 0), (0, -1)]
for dr, dc in directions:
r, c = row + dr, col + dc
if (r,c) not in visited and r < rows and c < cols and r >= 0 and c >=0 and matrix[r][c] == 1:
visited.add((r,c))
width = dfs(r, c, width + 1)
return width
for r in range(rows):
for c in range(cols):
if matrix[r][c] == 1 and (r, c) not in visited:
visited.add((r, c))
val = dfs(r, c, 1)
res.append(val)
return res
I'm trying to look into the A* Algorithm but I'm kind of having a hard time understanding a specific part. So the A* Algorithm Python Code with the example is this:
class Node():
"""A node class for A* Pathfinding"""
def __init__(self, parent=None, position=None):
self.parent = parent
self.position = position
self.g = 0
self.h = 0
self.f = 0
def __eq__(self, other):
return self.position == other.position
def astar(maze, start, end):
"""Returns a list of tuples as a path from the given start to the given end in the given maze"""
# Create start and end node
start_node = Node(None, start)
start_node.g = start_node.h = start_node.f = 0
end_node = Node(None, end)
end_node.g = end_node.h = end_node.f = 0
# Initialize both open and closed list
open_list = []
closed_list = []
# Add the start node
open_list.append(start_node)
# Loop until you find the end
while len(open_list) > 0:
# Get the current node
current_node = open_list[0]
current_index = 0
for index, item in enumerate(open_list):
if item.f < current_node.f:
current_node = item
current_index = index
# Pop current off open list, add to closed list
open_list.pop(current_index)
closed_list.append(current_node)
# Found the goal
if current_node == end_node:
path = []
current = current_node
while current is not None:
path.append(current.position)
current = current.parent
return path[::-1] # Return reversed path
# Generate children
children = []
for new_position in [(0, -1), (0, 1), (-1, 0), (1, 0), (-1, -1), (-1, 1), (1, -1), (1, 1)]: # Adjacent squares
# Get node position
node_position = (current_node.position[0] + new_position[0], current_node.position[1] + new_position[1])
# Make sure within range
if node_position[0] > (len(maze) - 1) or node_position[0] < 0 or node_position[1] > (len(maze[len(maze)-1]) -1) or node_position[1] < 0:
continue
# Make sure walkable terrain
if maze[node_position[0]][node_position[1]] != 0:
continue
# Create new node
new_node = Node(current_node, node_position)
# Append
children.append(new_node)
# Loop through children
for child in children:
# Child is on the closed list
for closed_child in closed_list:
if child == closed_child:
continue
# Create the f, g, and h values
child.g = current_node.g + 1
child.h = ((child.position[0] - end_node.position[0]) ** 2) + ((child.position[1] - end_node.position[1]) ** 2)
child.f = child.g + child.h
# Child is already in the open list
for open_node in open_list:
if child == open_node and child.g > open_node.g:
continue
# Add the child to the open list
open_list.append(child)
def main():
maze = [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 1, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 1, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 1, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 1, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 1, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 1, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 1, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 1, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 1, 0, 0, 0, 0, 0]]
start = (4, 3)
end = (4, 5)
path = astar(maze, start, end)
print(path)
if __name__ == '__main__':
main()
In the
for index, item in enumerate(open_list):
if item.f < current_node.f:
current_node = item
current_index = index
I don't get how the current_node can be defined as the item in the maze I've given above. In the example I've given above, the start = (4,3) and end = (4,5), giving the only possible shortest distance would be as something like the following:
maze = [[0, 0, 0, 0, *, 0, 0, 0, 0, 0],
[0, 0, 0, *, 1, *, 0, 0, 0, 0],
[0, 0, 0, *, 1, *, 0, 0, 0, 0],
[0, 0, 0, *, 1, *, 0, 0, 0, 0],
[0, 0, 0, s, 1, e, 0, 0, 0, 0],
[0, 0, 0, 0, 1, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 1, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 1, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 1, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 1, 0, 0, 0, 0, 0]]
with the s being the start_node and e being the end_node.
However, in the code of the A* Algorithm, the current_node becomes the item only if the item.f is smaller than the current_node.f. In the example I've given here, I can't see that the first * would have an f value smaller than the f value of the start_node - I mean, in the code, we already have described the start_node.f = 0 haven't we? And we defined the first current_node as the start_node... so no item in the open_list would have an item.f value smaller than zero..
How is this possible?? Or am I missing something here??
I think the clue is that you have to take into account the two lines above this for loop as well:
# Get the current node
current_node = open_list[0]
current_index = 0
for index, item in enumerate(open_list):
if item.f < current_node.f:
current_node = item
current_index = index
What happens:
In the first iteration of your while loop:
There is only one item in the open_list, being the start_node where indeed f=0
So after the above code block, this start node becomes the current_node
Right after the above loop the start_node is removed from the open_list: open_list.pop(current_index)
The open_list is then populated by the valid neighbouring locations (by walking its children)
In the second iteration of your while loop:
The above code block looks for the item in the open_list with the lowest f value
because of the first line current_node = open_list[0], you will be sure that the new current_node is always one from the open_list.
as the start_node has been removed from the open_list, it will for sure be replaced here
I am trying to compute the ARI between two sets of clusters, using this code:
#computes ARI for this type of clustering
def ARI(table,n):
index = 0
sum_a = 0
sum_b = 0
for i in range(len(table)-1):
for j in range(len(table)-1):
sum_a += choose(table[i][len(table)-1],2)
sum_b += choose(table[len(table)-1][j],2)
index += choose(table[i][j],2)
expected_index = (sum_a*sum_b)
expected_index = expected_index/choose(n,2)
max_index = (sum_a+sum_b)
max_index = max_index/2
return (index - expected_index)/(max_index-expected_index)
#choose to compute rand
def choose(n,r):
f = math.factorial
if (n-r)>=0:
return f(n) // f(r) // f(n-r)
else:
return 0
assuming I have created the contingency table correctly, I still get values outside the range of (-1,1).
For instance:
Contingency table:
[1, 0, 0, 0, 0, 0, 0, 1]
[1, 0, 0, 0, 0, 0, 0, 1]
[0, 0, 0, 1, 0, 0, 0, 1]
[0, 1, 0, 0, 0, 0, 0, 1]
[0, 0, 0, 0, 0, 1, 1, 2]
[1, 0, 1, 0, 1, 0, 0, 3]
[0, 0, 0, 0, 0, 0, 1, 1]
[3, 1, 1, 1, 1, 1, 2, 0]
yields an ARI of -1.6470588235294115 when I run my code.
Is there a bug in this code?
Also Here is how I am computing the contingency matrix:
table = [[0 for _ in range(len(subjects)+1)]for _ in range(len(subjects)+1)]
#comparing all clusters
for i in range(len(clusters)):
index_count = 0
for subject, orgininsts in orig_clusters.items():
madeinsts = clusters[i].instances
intersect_count = 0
#comparing all instances between the 2 clusters
for orginst in orgininsts:
for madeinst in makeinsts:
if orginst == madeinst:
intersect_count += 1
table[index_count][i] = intersect_count
index_count += 1
for i in range(len(table)-1):
a = 0
b = 0
for j in range(len(table)-1):
a += table[i][j]
b += table[j][i]
table[i][len(table)-1] = a
table[len(table)-1][i] = b
clusters is a list of cluster objects that have attribute instances, which is a list of instances contained in that cluster. orig_clusters is a dictonary with keys representing cluster labels, and values are a list of instances contained in that cluster. Is there a bug in this code?
You make some mistakes calculating the ARI in your code -- you calculate a and b too often because you loop over your table twice instead of just once.
Also, you pass n as a parameter, but apparently it is set to 10 (that is how I get your result). It would be easier to just pass the table and then calculate n from there. I fixed your code a bit:
def ARI(table):
index = 0
sum_a = 0
sum_b = 0
n = sum([sum(subrow) for subrow in table]) #all items summed
for i in range(len(table)):
b_row = 0#this is to hold the col sums
for j in range(len(table)):
index += choose(table[i][j], 2)
b_row += table[j][i]
#outside of j-loop b.c. we want to use a=rowsums, b=colsums
sum_a += choose(sum(table[i]), 2)
sum_b += choose(b_row, 2)
expected_index = (sum_a*sum_b)
expected_index = expected_index/choose(n,2)
max_index = (sum_a+sum_b)
max_index = max_index/2
return (index - expected_index)/(max_index-expected_index)
or if you pass on the table with row- and column sums:
def ARI(table):
index = 0
sum_a = 0
sum_b = 0
n = sum(table[len(table)-1]) + sum([table[i][len(table)-1] for i in range(len(table)-1)])
for i in range(len(table)-1):
sum_a += choose(table[i][len(table)-1],2)
sum_b += choose(table[len(table)-1][i],2)
for j in range(len(table)-1):
index += choose(table[i][j],2)
expected_index = (sum_a*sum_b)
expected_index = expected_index/choose(n,2)
max_index = (sum_a+sum_b)
max_index = max_index/2
return (index - expected_index)/(max_index-expected_index)
then
def choose(n,r):
f = math.factorial
if (n-r)>=0:
return f(n) // f(r) // f(n-r)
else:
return 0
table = [[1, 0, 0, 0, 0, 0, 0, 1],
[1, 0, 0, 0, 0, 0, 0, 1],
[0, 0, 0, 1, 0, 0, 0, 1],
[0, 1, 0, 0, 0, 0, 0, 1],
[0, 0, 0, 0, 0, 1, 1, 2],
[1, 0, 1, 0, 1, 0, 0, 3],
[0, 0, 0, 0, 0, 0, 1, 1],
[3, 1, 1, 1, 1, 1, 2, 0]]
ARI(table)
ARI(table)
Out[56]: -0.0604008667388949
The correct result!
I have a data set containing with only 0 and 1. I want to have a detector to find where 1 starts and where 1 ends, and then return something related to their index to a different list each. So I've written some codes as below:
n= [1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1,
1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1]
def detector (data):
x = 0
start = []
end = []
for index, i in enumerate(data):
if x == 0 and i == 1:
start.append((index+1))
x == 1
elif x == 1 and i==0:
end.append((index))
x == 0
return start, end
print (detector(n))
However when I run the code above, it returned like below, which is not my desired output.
([1, 2, 3, 4, 22, 23, 24, 25, 26, 27, 28, 34, 35, 36, 37, 38], [])
My desired output is as below:
([1, 22, 34], [4,28,38])
As you can see above, the start_time should be[1,22,34] and end_time should be [4,28,38].
If anyone knows how to solve the issue, pls let me know. Appreciated!!
One issue is certainly, that you dont change flag.
== is a comparison operator and does not assign a new value to flag
using enumerate to get positions of 1s and zip to find when sequence of consecutive 1s starts/ends
ones_positions = [position
for position, value in enumerate(n)
if value == 1]
ones_starts = [ones_positions[0]] + [
next_position
for position, next_position in zip(ones_positions,
ones_positions[1:])
if next_position - position > 1]
ones_ends = [position
for position, next_position in zip(ones_positions,
ones_positions[1:])
if next_position - position > 1] + [ones_positions[-1]]
gives us
>>>ones_starts
[0, 21, 33]
>>>ones_ends
[3, 27, 37]
we can specify enumerate's start parameter if you want your indices to start from 1 (when they are naturally start from 0)
ones_positions = [position
for position, value in enumerate(n, start=1)
if value == 1]
after that
>>>ones_starts
[1, 22, 34]
>>>ones_ends
[4, 28, 38]
Finally we can write it as function:
def detector(data, target_value=1):
positions = [position
for position, value in enumerate(data, start=1)
if value == target_value]
start_times = [positions[0]] + [
next_position
for position, next_position in zip(positions,
positions[1:])
if next_position - position > 1]
end_times = [position
for position, next_position in zip(positions,
positions[1:])
if next_position - position > 1] + [positions[-1]]
return start_times, end_times
and test
n = [1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1,
1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1]
print(detector(n))
gives us
([1, 22, 34], [4, 28, 38])
n = [1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1,
1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1]
prev_num = 0
starts = []
ends = []
result = (starts, ends)
for idx, num in enumerate(n):
if prev_num == 0 and num == 1:
starts.append(idx + 1)
elif prev_num == 1 and num == 0:
ends.append(idx + 1)
elif num == 1 and idx == (len(n) - 1):
ends.append(idx + 1)
prev_num = num
print(result)
Which prints:
[[1, 22, 34], [5, 29, 38]]
Since #DanielChristiany pointed you where your mistake was. I will present you my solution which is faster than any of presented(at least that works correctly):
edges = (index for index, i in enumerate(n[1:], 1) if i != n[index-1])
if n[0] == 1:
edges = (1, *edges)
if n[-1] == 1:
some = (*edges, len(n))
print(edges[::2], edges[1::2])
Basically it firstly searches edges where element changes from 0 to 1 or from 1 to 0. Then checks if first and last elements are 1 and then print result.
This solution also uses less memory since it uses generators.
You could also try using groupby:
import itertools
L = [[y[0] for y in it]
for x,it in
itertools.groupby(enumerate(n),lambda x: x[1])
][::2]
res = [x[0] for x in L],[x[-1] for x in L]
You could probably arrive at an even more correct solution without using indexes.
Thanks to vishes_shell for the correction
A rectangle is defined as any rectangular-shaped section of zeros within a 2-d array of 1s and 0s. Typical example:
[
[1, 1, 1, 1, 1, 1, 1, 1, 1],
[1, 1, 1, 1, 1, 1, 1, 1, 0],
[1, 1, 1, 0, 0, 0, 1, 0, 0],
[1, 0, 1, 0, 0, 0, 1, 0, 0],
[1, 0, 1, 1, 1, 1, 1, 1, 1],
[1, 0, 1, 0, 0, 1, 1, 1, 1],
[1, 1, 1, 0, 0, 1, 1, 1, 1],
[1, 1, 1, 1, 1, 1, 1, 1, 1],
]
In this example, there are three such arrays:
My goal is to determine the coordinates (outer 3 extremeties) of each array.
I start by converting the 2-d list into a numpy array:
image_as_np_array = np.array(two_d_list)
I can then get the coordinates of all the zeros thus:
np.argwhere(image_as_np_array == 0)
But this merely provides a shortcut to getting the indices by iterating over each row and calling .index(), then combining with the index of that row within the 2-d list.
I envisage now doing something like removing any element of np.argwhere() (or np.where()) where there is only a single occurrence of a 0 (effectively disregarding any row that cannot form part of a rectangle), and then trying to align contiguous coordinates, but I'm stuck with figuring out how to handle cases where any row may contain part of more than just one single rectangle (as is the case in the 3rd and 4th rows above). Is there a numpy function or functions I can leverage?
I don't know numpy, so here's a plain Python solution:
from collections import namedtuple
Rectangle = namedtuple("Rectangle", "top bottom left right")
def find_rectangles(arr):
# Deeply copy the array so that it can be modified safely
arr = [row[:] for row in arr]
rectangles = []
for top, row in enumerate(arr):
start = 0
# Look for rectangles whose top row is here
while True:
try:
left = row.index(0, start)
except ValueError:
break
# Set start to one past the last 0 in the contiguous line of 0s
try:
start = row.index(1, left)
except ValueError:
start = len(row)
right = start - 1
if ( # Width == 1
left == right or
# There are 0s above
top > 0 and not all(arr[top-1][left:right + 1])):
continue
bottom = top + 1
while (bottom < len(arr) and
# No extra zeroes on the sides
(left == 0 or arr[bottom][left-1]) and
(right == len(row) - 1 or arr[bottom][right + 1]) and
# All zeroes in the row
not any(arr[bottom][left:right + 1])):
bottom += 1
# The loop ends when bottom has gone too far, so backtrack
bottom -= 1
if ( # Height == 1
bottom == top or
# There are 0s beneath
(bottom < len(arr) - 1 and
not all(arr[bottom + 1][left:right+1]))):
continue
rectangles.append(Rectangle(top, bottom, left, right))
# Remove the rectangle so that it doesn't affect future searches
for i in range(top, bottom+1):
arr[i][left:right+1] = [1] * (right + 1 - left)
return rectangles
For the given input, the output is:
[Rectangle(top=2, bottom=3, left=3, right=5),
Rectangle(top=5, bottom=6, left=3, right=4)]
This is correct because the comments indicate that the 'rectangle' on the right is not to be counted since there is an extra 0 sticking out. I suggest you add more test cases though.
I expect it to be reasonably fast since much of the low-level iteration is done with calls to index and any, so there's decent usage of C code even without the help of numpy.
I have written a simple algorithms using the Sweep line method. The idea is that You go through the columns of You array column by column, and detect the series of zeros as potentially new rectangles. In each column You have to check if the rectangles detected earlier have ended, and if yes add them to the results.
import numpy as np
from sets import Set
from collections import namedtuple
example = np.array([
[1, 1, 1, 1, 1, 1, 1, 1, 1],
[1, 1, 1, 1, 1, 1, 1, 1, 0],
[1, 1, 1, 0, 0, 0, 1, 0, 0],
[1, 0, 1, 0, 0, 0, 1, 0, 0],
[1, 0, 1, 1, 1, 1, 1, 1, 1],
[1, 0, 1, 0, 0, 1, 1, 1, 1],
[1, 1, 1, 0, 0, 1, 1, 1, 1],
[1, 1, 1, 1, 1, 1, 1, 1, 1],
])
Rectangle = namedtuple("Rectangle", "left top bottom right")
def sweep(A):
height = A.shape[0]
length = A.shape[1]
rectangles = dict() # detected rectangles {(rowstart, rowend): col}
result = []
# sweep the matrix column by column
for i in xrange(length):
column = A[:, i]
# for currently detected rectangles check if we should extend them or end
for r in rectangles.keys():
# detect non rectangles shapes like requesten in question edit and del those rectangles
if all([x == 0 for x in column[r[0]:r[1]+1]]) and ((r[0]-1>0 and column[r[0]-1]==0) or (r[1]+1<height and column[r[1]+1]==0)):
del rectangles[r]
elif any([x == 0 for x in column[r[0]:r[1]+1]]) and not all([x == 0 for x in column[r[0]:r[1]+1]]):
del rectangles[r]
# special case in the last column - add detected rectangles
elif i == length - 1 and all([x == 0 for x in column[r[0]:r[1]+1]]):
result.append(Rectangle(rectangles[r], r[0], r[1], i))
# if detected rectangle is not extended - add to result and del from list
elif all([x == 1 for x in column[r[0]:r[1]+1]]):
result.append(Rectangle(rectangles[r], r[0], r[1], i-1))
del rectangles[r]
newRectangle = False
start = 0
# go through the column and check if any new rectangles appear
for j in xrange(height):
# new rectangle in column detected
if column[j] == 0 and not newRectangle and j+1 < height and column[j+1] == 0:
start = j
newRectangle = True
# new rectangle in column ends
elif column[j] == 1 and newRectangle:
# check if new detected rectangle is already on the list
if not (start, j-1) in rectangles:
rectangles[(start, j-1)] = i
newRectangle = False
# delete single column rectangles
resultWithout1ColumnRectangles = []
for r in result:
if r[0] != r[3]:
resultWithout1ColumnRectangles.append(r)
return resultWithout1ColumnRectangles
print example
print sweep(example)
returns:
[[1 1 1 1 1 1 1 1 1]
[1 1 1 1 1 1 1 1 0]
[1 1 1 0 0 0 1 0 0]
[1 0 1 0 0 0 1 0 0]
[1 0 1 1 1 1 1 1 1]
[1 0 1 0 0 1 1 1 1]
[1 1 1 0 0 1 1 1 1]
[1 1 1 1 1 1 1 1 1]]
[Rectangle(left=3, top=5, bottom=6, right=4),
Rectangle(left=3, top=2, bottom=3, right=5)]