Subregions of boolean 2d array - python

Say i have a 2 dimensional boolean array. I would like to get a list of slices/or alike, where each slice represents the least (in size) subregions of the array contanining True values while its border contains all False.
I could loop for each row and column and store the indices when such condition is met, but i wonder if you know another way or a library that does this efficiently? You can assume that the boundary of the original boolean array is always False/0.
Example 1
Example 2
Edit ! Added new examples with the correct solutions. Sorry for the confusion.

That's connected component analysis, which has been asked and answered before. Adapting the accepted answer from there to your needs, a possible solution is quite short:
import numpy as np
from scipy.ndimage.measurements import label
def analysis(array):
labeled, _ = label(array, np.ones((3, 3), dtype=np.int))
for i in np.arange(1, np.max(labeled)+1):
pixels = np.array(np.where(labeled == i))
x1 = np.min(pixels[1, :])
x2 = np.max(pixels[1, :])
y1 = np.min(pixels[0, :])
y2 = np.max(pixels[0, :])
print(str(i) + ' | slice: array[' + str(y1) + ':' + str(y2) + ', ' + str(x1) + ':' + str(x2) + ']')
example1 = np.array([
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 1, 0, 0, 0, 0, 0],
[0, 0, 0, 1, 0, 1, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 1, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 1, 1, 0],
[0, 0, 0, 0, 0, 0, 1, 1, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 1, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
]).astype(bool)
example2 = np.array([
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 1, 0, 1, 0, 0],
[0, 0, 0, 1, 0, 0, 1, 0, 1, 0],
[0, 0, 0, 0, 0, 0, 0, 1, 0, 0],
[0, 1, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 1, 0, 0, 1, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 1, 1, 0, 0, 1, 0],
[0, 0, 0, 0, 1, 0, 1, 0, 0, 0],
[0, 0, 0, 1, 0, 0, 0, 0, 1, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
]).astype(bool)
for a in [example1, example2]:
print(a, '\n')
analysis(a)
print('\n')
That's the output (without the examples):
[[...]]
1 | slice: array[1:2, 3:5]
2 | slice: array[4:6, 6:8]
3 | slice: array[8:8, 2:2]
[[...]]
1 | slice: array[1:3, 5:8]
2 | slice: array[2:2, 3:3]
3 | slice: array[4:5, 1:1]
4 | slice: array[5:8, 3:6]
5 | slice: array[6:6, 8:8]
6 | slice: array[8:8, 8:8]
Hope that helps!
------------------
System information
------------------
Python: 3.8.1
SciPy: 1.4.1
------------------

You can approach the problem from a graph perspective, with the coordinates of the ones being the graph elements, 8-way connected - then you just have to find the connected components in the graph. If the data is sparse, this should be quite faster than looping through possible square sizes. This an example of how it could work:
from itertools import combinations
def find_squares(a):
# Find ones
ones = [(i, j) for i, row in enumerate(a) for j, val in enumerate(row) if val]
# Make graph of connected ones
graph = {a: [] for a in ones}
for a, b in combinations(ones, 2):
if abs(a[0] - b[0]) <= 1 and abs(a[1] - b[1]) <= 1:
graph[a].append(b)
graph[b].append(a)
# Find connected components in graph
components = []
for a, a_neigh in graph.items():
if any(a in c for c in components):
continue
component = {a, *a_neigh}
pending = [*a_neigh]
visited = {a}
while pending:
b = pending.pop()
if b in visited:
continue
visited.add(b)
component.add(b)
b_neigh = graph[b]
component.update(b_neigh)
pending.extend(c for c in b_neigh if c not in visited)
components.append(component)
# Find bounds for each component
bounds = [((min(a[0] for a in c), min(a[1] for a in c)),
(max(a[0] for a in c), max(a[1] for a in c)))
for c in components]
return bounds
a = [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 1, 0, 1, 0, 0],
[0, 0, 0, 1, 0, 0, 1, 0, 1, 0],
[0, 0, 0, 0, 0, 0, 0, 1, 0, 0],
[0, 1, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 1, 0, 0, 1, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 1, 1, 0, 0, 1, 0],
[0, 0, 0, 0, 1, 0, 1, 0, 0, 0],
[0, 0, 0, 1, 0, 0, 0, 0, 1, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]
square_bounds = find_squares(a)
print(*square_bounds, sep='\n')
# ((1, 5), (3, 8))
# ((2, 3), (2, 3))
# ((4, 1), (5, 1))
# ((5, 3), (8, 6))
# ((6, 8), (6, 8))
# ((8, 8), (8, 8))

Related

optimizing my Battleship field validator 4x4, and fix error

I'm a new to stackoverflow, recently I'm trying to solve a problem from codewar with Python 3x and it's about validating battleship location on a 10x10 grid. I passed 33 tests and failed 17 one, which is pretty bad, plus I sometimes get timeout error, so I figured there must be something wrong with my code, I'm open to listen to any opinion, can somebody help me?
def validate_battlefield(field):
# important list comprehension !!!! to add element in a sublist infront and behind
field = [[0] + k + [0] for k in field]
extended_field = [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]
extended_field.extend(field)
extended_field.extend([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]])
field = extended_field
# print(extended_field)
list_of_battleship = []
for h in range(1, 11):
for w in range(1, 11):
if field[h][w] == 1:
m, n = h, w
print("*****************")
print(h, n)
length_ship = 1
field[m][n] = -1
if field[h + 1][w] == 0 and field[h][w + 1] == 0:
list_of_battleship.append(1)
break
while field[m][n + 1] + field[m + 1][n] + field[m][n-1] != 2:
if field[m][n + 1] == 1:
print(m, n + 1)
length_ship += 1
field[m][n + 1] = -1
n += 1
if field[m][n + 1] == 0:
list_of_battleship.append(length_ship)
break
if field[m + 1][n] == 1:
print(m + 1, n)
length_ship += 1
field[m + 1][n] = -1
m += 1
if field[m + 1][n] == 0:
list_of_battleship.append(length_ship)
break
list_of_battleship.sort()
if list_of_battleship == [1, 1, 1, 1, 2, 2, 2, 3, 3, 4]:
return True
else:
return False
I can't get the following validation right, I really don't know where went wrong
print(validate_battlefield([
[1, 0, 0, 0, 0, 1, 1, 0, 0, 0],
[1, 0, 1, 0, 0, 0, 0, 0, 1, 0],
[1, 0, 1, 0, 1, 1, 1, 0, 1, 0],
[1, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 1, 0],
[0, 0, 0, 0, 1, 1, 1, 0, 0, 0],
[0, 1, 0, 0, 0, 0, 0, 0, 1, 0],
[0, 0, 0, 1, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 1, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0],]
))
print(validate_battlefield(
[
[0, 0, 0, 0, 1, 1, 1, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
[0, 0, 0, 0, 0, 0, 0, 1, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 1, 0, 0, 1, 1, 1, 1],
[1, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[1, 0, 0, 0, 0, 1, 1, 0, 0, 0],
[0, 0, 0, 1, 0, 0, 0, 0, 0, 1],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
[0, 0, 0, 1, 1, 1, 0, 0, 0, 0]
]
))

Looping and counting python 2d arrays

I have array like this:
[
[1, 0, 0, 0, 0, 0, 0, 0, 0],
[1, 0, 0, 0, 0, 0, 0, 0, 0],
[1, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 1, 0, 0, 0, 0, 0, 0, 0],
[0, 1, 0, 0, 0, 0, 0, 0, 0],
[0, 1, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 1, 0, 0, 0, 0, 0, 0],
[0, 0, 1, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 1, 0, 0, 0, 0, 0]
]
I want to count value every 3 array, so the result i expected is:
[
[3, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 3, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 2, 1, 0, 0, 0, 0, 0]
]
I have no idea to loop it.
UPDATE..
this problem was solved. Thank you. I try Shijith's code this
if len(arr)%3==0:
print([[sum(y) for y in zip(arr[x],arr[x+1],arr[x+2])] for x in range(0, len(arr),3)])
Try:
result = []
for i in range(int(len(a)/3)):
result.append(np.sum(a[i*3:i*3+3], axis=0))
[array([3, 0, 0, 0, 0, 0, 0, 0, 0]),
array([0, 3, 0, 0, 0, 0, 0, 0, 0]),
array([0, 0, 2, 1, 0, 0, 0, 0, 0])]
you can use numpy.sum() along axis=0 , for every three rows in your array.
import numpy as np
if len(arr)%3==0:
print(np.array([np.sum(arr[x:x+3], axis = 0) for x in range(0, len(arr),3) ]))
[[3 0 0 0 0 0 0 0 0]
[0 3 0 0 0 0 0 0 0]
[0 0 2 1 0 0 0 0 0]]
or use simple list comprehension,
if len(arr)%3==0:
print([[sum(y) for y in zip(arr[x],arr[x+1],arr[x+2])] for x in range(0, len(arr),3)])
[[3, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 3, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 2, 1, 0, 0, 0, 0, 0]]
You can do this in pandas like this ( It splits the rows into 3, then takes the sum of each set of rows) :
import pandas as pd
import numpy as np
df=pd.DataFrame([
[1, 0, 0, 0, 0, 0, 0, 0, 0],
[1, 0, 0, 0, 0, 0, 0, 0, 0],
[1, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 1, 0, 0, 0, 0, 0, 0, 0],
[0, 1, 0, 0, 0, 0, 0, 0, 0],
[0, 1, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 1, 0, 0, 0, 0, 0, 0],
[0, 0, 1, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 1, 0, 0, 0, 0, 0]
])
df.groupby(np.arange(len(df))//3).sum().to_numpy().tolist()
output:
[[3, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 3, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 2, 1, 0, 0, 0, 0, 0]]
For a pure non import way:
combine=[]
for x in range(3):
combine.append(list(sum((a[x*3:x*3+3]))))
list(combine)
output:
[[3, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 3, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 2, 1, 0, 0, 0, 0, 0]]
result = [[sum(three) for three in
zip(arr[first], arr[first + 1], arr[first + 2])]
for first in range(0, len(array)-len(array)%3, 3)]
print(result)
Output
[[3, 0, 0, 0, 0, 0, 0, 0, 0], [0, 3, 0, 0, 0, 0, 0, 0, 0], [0, 0, 2, 1, 0, 0, 0, 0, 0]]

How to set a probability of a value becoming a zero for an np.array?

I've got an np.array 219 by 219 with mostly 0s and 2% of nonzeros and I know want to create new arrays where each of the nonzero values has 90% of chance of becoming a zero.
I now know how to change the n-th non zero value to 0 but how to work with probabilities?
Probably this can be modified:
index=0
for x in range(0, 219):
for y in range(0, 219):
if (index+1) % 10 == 0:
B[x][y] = 0
index+=1
print(B)
You could use np.random.random to create an array of random numbers to compare with 0.9, and then use np.where to select either the original value or 0. Since each draw is independent, it doesn't matter if we replace a 0 with a 0, so we don't need to treat zero and nonzero values differently. For example:
In [184]: A = np.random.randint(0, 2, (8,8))
In [185]: A
Out[185]:
array([[1, 1, 1, 0, 0, 0, 0, 1],
[1, 1, 1, 0, 0, 0, 0, 0],
[1, 1, 1, 1, 1, 0, 0, 0],
[0, 1, 0, 1, 0, 0, 0, 1],
[0, 1, 0, 1, 1, 1, 1, 0],
[1, 1, 0, 1, 1, 0, 0, 0],
[1, 0, 0, 1, 0, 0, 1, 0],
[1, 1, 0, 0, 0, 1, 0, 1]])
In [186]: np.where(np.random.random(A.shape) < 0.9, 0, A)
Out[186]:
array([[0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 1],
[0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0],
[0, 1, 0, 0, 0, 0, 0, 0]])
# first method
prob=0.3
print(np.random.choice([2,5], (5,), p=[prob,1-prob]))
# second method (i prefer)
import random
import numpy as np
def randomZerosOnes(a,b, N, prob):
if prob > 1-prob:
n1=int((1-prob)*N)
n0=N-n1
else:
n0=int(prob*N)
n1=N-n0
zo=np.concatenate(([a for _ in range(n0)] ,[b for _ in range(n1)] ), axis=0 )
random.shuffle(zo)
return zo
zo=randomZerosOnes(2,5, N=5, prob=0.3)
print(zo)

Drawing a directed graph using a link matrix with networkx

I am working on pagerank for a school project, and i have a matrix where the row "i" represent the links from the site j (line) to the site i. (If it is still unclear i'll explain more).
The current part is:
Z=[[0,1,1,1,1,0,1,0,0,0,0,0,0,0],[1,0,0,0,1,0,0,0,0,0,0,0,0,0], [1,1,0,0,0,0,0,0,0,0,0,0,0,0],[1,0,1,0,0,0,0,0,0,0,0,0,0,0],[1,0,0,1,0,0,0,0,0,0,0,0,0,0],[1,0,0,0,0,0,0,1,0,1,0,0,0,0],[0,0,0,0,0,1,0,0,0,0,0,0,0,0],[0,0,0,0,0,1,1,0,1,0,0,0,0,0],[0,0,0,0,0,1,0,0,0,0,0,0,0,0],[0,0,0,0,0,0,0,0,1,0,1,1,1,1],[0,0,0,0,0,0,0,0,0,1,0,0,0,1],[0,0,0,0,0,0,0,0,0,1,1,0,0,0],[0,0,0,0,0,0,0,0,0,1,0,1,0,0],[0,0,0,0,0,0,0,0,0,1,0,0,1,0]]
A=np.matrix(Z)
G=nx.from_numpy_matrix(A,create_using=nx.MultiDiGraph())
pos=nx.circular_layout(G)
labels={}
for i in range (N):
labels[i]=i+1
nx.draw_circular(G)
nx.draw_networkx_labels(G,pos,labels,font_size=15)
The problem i have is that the labels are not where they are supposed to be, it seems that networkx is just placing them clockwise...
Also, how could i easily direct the graph, so that a link from j to i won't be from i to j?
Thanks!
import numpy as np
import matplotlib.pyplot as plt
import networkx as nx
Z = [[0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0],
[1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0]]
G = nx.from_numpy_matrix(np.array(Z), create_using=nx.MultiDiGraph())
pos = nx.circular_layout(G)
nx.draw_circular(G)
labels = {i : i + 1 for i in G.nodes()}
nx.draw_networkx_labels(G, pos, labels, font_size=15)
plt.show()
yields
This result appears correct to me. Notice, for example, that the node labeled 1 has directed edges pointing to 2, 3, 4, 5 and 7. This corresponds to the ones on the first row in the array, Z[0]:
[0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0]
since the first row corresponds to node 1, and the ones in this row occur in the columns corresponding to nodes 2, 3, 4, 5 and 7.

Longest Common Sequence -Index Error

I am trying to find the LCS between two sequences: TACGCTGGTACTGGCAT and AGCTGGTCAGAA. I want my answer to output as a matrix so that I can backtrack which sequence is common (GCTGGT). When I use my code below, I am getting the following error. IndexError: list index out of range. How can I avoid this error in my code below?
def LCS(x, y):
m = len(x)
n = len(y)
C = []
for i in range(m):
for j in range(n):
if x[i] == y[j]:
C[i][j] == C[i-1][j-1] + 1
else:
C[i][j] == 0
return C
x = "TACGCTGGTACTGGCAT"
y = "AGCTGGTCAGAA"
m = len(x)
n = len(y)
C = LCS(x, y)
print C
You need to append to your list, because the index [i][j] does not exist yet.
def LCS(x, y):
m = len(x)
n = len(y)
C = []
for i in range(m):
C.append([]) # append a blank list at index [i]
for j in range(n):
if x[i] == y[j]:
C[i].append(C[i-1][j-1] + 1) # append current element to [i]
else:
C[i].append(0)
return C
Testing
x = "TACGCTGGTACTGGCAT"
y = "AGCTGGTCAGAA"
LCS(x,y)
Output
[[0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0],
[1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1],
[0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0],
[0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0],
[0, 0, 2, 0, 0, 0, 0, 1, 0, 0, 0, 0],
[0, 0, 0, 3, 0, 0, 1, 0, 0, 0, 0, 0],
[0, 1, 0, 0, 4, 1, 0, 0, 0, 1, 0, 0],
[0, 1, 0, 0, 1, 5, 0, 0, 0, 1, 0, 0],
[0, 0, 0, 1, 0, 0, 6, 0, 0, 0, 0, 0],
[1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1],
[0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0],
[0, 0, 0, 2, 0, 0, 1, 0, 0, 0, 0, 0],
[0, 1, 0, 0, 3, 1, 0, 0, 0, 1, 0, 0],
[0, 1, 0, 0, 1, 4, 0, 0, 0, 1, 0, 0],
[0, 0, 2, 0, 0, 0, 0, 1, 0, 0, 0, 0],
[1, 0, 0, 0, 0, 0, 0, 0, 2, 0, 1, 1],
[0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0]]

Categories