Finding biggest negative submatrix in python - python

I want find the biggest submatrix which contains only negative numbers in a matrix, for example:
In
[[1, -9, -2, 8, 6, 1],
[8, -1,-11, -7, 6, 4],
[10, 12, -1, -9, -12, 14],
[8, 10, -3, -5, 17, 8],
[6, 4, 10, -13, -16, 19]]
the biggest submatrix containing only negative numbers is
[[-11, -7],
[-1, -9],
[-3,-5]]
(left upper corner coordinates: 1,2, right lower corner coordinates: 3,3).
What's the most effective way to do it?

A brute force solution. Will work, but may be considered too slow for a bigger matrix:
mOrig = [[1, -9, -2, 8, 6, 1],
[8, -1,-11, -7, 6, 4],
[10, 12, -1, -9, -12, 14],
[8, 10, -3, -5, 17, 8],
[6, 4, 10, -13, -16, 19]]
# reduce the problem
# now we have a matrix that contains only 0 and 1
# at the place where there was a negative number
# there is now a 1 and at the places where a positive
# number had been there is now a 0. 0s are considered
# to be negative numbers, if you want to change this,
# change the x < 0 to x <= 0.
m = [[1 if x < 0 else 0 for x in z] for z in mOrig]
# now we have the problem to find the biggest submatrix
# consisting only 1s.
# first a function that checks if a submatrix only contains 1s
def containsOnly1s(m, x1, y1, x2, y2):
for i in range(x1, x2):
for j in range(y1, y2):
if m[i][j] == 0:
return False
return True
def calculateSize(x1, y1, x2, y2):
return (x2 - x1) * (y2 - y1)
best = (-1, -1, -1, -1, -1)
for x1 in range(len(m)):
for y1 in range(len(m[0])):
for x2 in range(x1, len(m)):
for y2 in range(y1, len(m[0])):
if containsOnly1s(m, x1, y1, x2, y2):
sizeOfSolution = calculateSize(x1, y1, x2, y2)
if best[4] < sizeOfSolution:
best = (x1, y1, x2, y2, sizeOfSolution)
for x in range(best[0], best[2]):
print("\t".join([str(mOrig[x][y]) for y in range(best[1], best[3])]))
Will output
-11 -7
-1 -9
-3 -5
In case something else is meant with "biggest submatrix", the only function that needs to get changed is the following:
def calculateSize(x1, y1, x2, y2):
return (x2 - x1) * (y2 - y1)
which is calculating the size of a submatrix.
Edit 1 ... first speedup
best = (-1, -1, -1, -1, -1)
for x1 in range(len(m)):
for y1 in range(len(m[0])):
if m[x1][y1] == 1: # The starting point must contain a 1
for x2 in range(x1 + 1, len(m)): # actually can start with x1 + 1 here
for y2 in range(y1 + 1, len(m[0])):
if containsOnly1s(m, x1, y1, x2, y2):
sizeOfSolution = calculateSize(x1, y1, x2, y2)
if best[4] < sizeOfSolution:
best = (x1, y1, x2, y2, sizeOfSolution)
else:
# There is at least one 0 in the matrix, so every greater
# matrix will also contain this 0
break
Edit 2
Ok, after converting the matrix into a matrix of 0 and 1 (as I do via the line m = [[1 if x < 0 else 0 for x in z] for z in mOrig] the problem is the same as what is called the maximal rectangle problem in literature. So I googled a bit about known algorithms for this kind of problem and came across this site here http://www.drdobbs.com/database/the-maximal-rectangle-problem/184410529 which is describing a very fast algorithm to solve this kind of problem. To summarize the points of this website, the algorithm is exploiting the structure. This can be done by using a stack in order to remember the structure profile which allows us to recalculate the width in case a narrow rectangle gets reused when an wider one gets closed.

Here is my pretty fast solution using convolutions from OpenCV. Usage of float32 is required as it is much faster than integers. Takes 135 ms for 1000 x 1000 matrix on my 2 cores. There's space for further code optimization though.
import cv2
import numpy as np
data = """1 -9 -2 8 6 1
8 -1 -11 -7 6 4
10 12 -1 -9 -12 14
8 10 -3 -5 17 8
6 4 10 -13 -16 19"""
# matrix = np.random.randint(-128, 128, (1000, 1000), dtype=np.int32)
matrix = np.int32([line.split() for line in data.splitlines()])
def find_max_kernel(matrix, border=cv2.BORDER_ISOLATED):
max_area = 0
mask = np.float32(matrix < 0)
ones = np.ones_like(mask)
conv_x = np.zeros_like(mask)
conv_y = np.zeros_like(mask)
max_h, max_w = mask.shape
for h in range(1, max_h + 1):
cv2.filter2D(mask, -1, ones[:h, None, 0], conv_y, (0, 0), 0, border)
for w in range(1, max_w + 1):
area = h * w
if area > max_area:
cv2.filter2D(conv_y, -1, ones[None, 0, :w], conv_x, (0, 0), 0, border)
if conv_x.max() == area:
max_area, shape = area, (h, w)
else:
if w == 1:
max_h = h - 1
if h == 1:
max_w = w - 1
break
if h >= max_h:
break
cv2.filter2D(mask, -1, np.ones(shape, np.float32), conv_x, (0, 0), 0, border)
p1 = np.array(np.unravel_index(conv_x.argmax(), conv_x.shape))
p2 = p1 + shape - 1
return p1, p2
print(*find_max_kernel(matrix), sep='\n')

Below is a function that executes in much less than a second for a 5000x5000 matrix. It relies only on basic np-functions.
It could be improved by returning the first index instead of all indices. Several other optimizations can be done but for many uses it is fast enough.
from numpy import roll, where
def gidx(X):
Wl = X & roll(X, 1, axis=1)
T = X & Wl & roll(X, -1, axis=1)
if T[1:-1][1:-1].any():
N = T & roll(T, -1, axis=0) & roll(T, 1, axis=0)
if N.any(): return gidx(N)
W = Wl & roll(Wl, 1, axis=0)
if W.any(): return where(W)
return where(X)
#%% Example
import numpy as np
#np.random.seed(0)
M = 100
X = np.random.randn(M, M) - 2
X0 = (X < 0)
X0[[0, -1]], X0[:, [0, -1]] = False, False
jx, kx = gidx(X0)

Related

How do I find the x and y values in an array where the array is divided equally in a cartesian orientation?

What I'm trying to do is simply find and define an x and y coordinate for the highest number in the array.
For example, 50000 would output: x = 2, y = 2. Is there an easy way to accomplish this?
I created this code below:
data_array = [[0, 1, 2, 3, 50000],
[5, 6, 7, 8, 9],
[10, 11, 12, 13, 14],
[15, 16, 17, 18, 19],
[20, 21, 22, 23, 24]]
highest_num = data_array[0][0]
x = 0
y = 0
# looping from 0 to len(data_array)-1
for i in range(len(data_array)):
# looping from 0 to len(data_array[i])-1
for j in range(len(data_array[i])):
# checking data_array[x][y] is less than data_array[i][j]
if data_array[x][y] < data_array[i][j]:
# updating x and y
x = i
y = j
highest_num = data_array[i][j]
# printing the values of highest_num, x and y
print("highest_num =", highest_num)
print("x =", x)
print("y =", y)
But I would get x = 0, y = 4. I wanted to reference the middle of the array which is 12 and make the output be x = 2, y = 2.
Can this be accomplished without numpy where? I want the points to track with the max wherever it is.
Modify 2 lines in your program:
print("x =", x-2)
print("y =", y+2)
if you could use numpy do the argmax like this then find the coordinate (row col) and coordinate switch to chartersian (reverse y) then both subtracte the half , (considering it is even in both dimentions.
idx = np.argmax(data_array)
m, n = len(data_array), len(data_array[0])
r, c = m - (idx // n) - 1 , idx % n
y , x = r - (m // 2), c - (n // 2)
if not just use the code your writetn to find maximum index.

Evaluate PDF of multivariate uniform distribution

I would like to compute the density of a point with respect to the Uniform distribution over [-1, 1] x [-1, 1]. I tried this:
import numpy as np
from scipy.stats import uniform
x = np.zeros(2)
uniform(loc=np.array([-1, -1]), scale=np.array([2, 2])).pdf(x)
but this returns a 2D vector rather than a scalar value.
All I could come up with was
0.25 * np.all(np.abs(x) <= 2)
Would be nice to get something faster. This is the best I could do in terms of a general function.
def multivariate_uniform(x, lows, highs):
return np.all((lows <= x) & (x <= highs)) / np.prod(highs - lows)
If you look for something simple, maybe you can try this. You need to fine tune the code to fit your needs.
def multivariate_uniform(x, a, b, c, d):
"""
(a,d) -------- (b,d)
| |
| |
| |
(a,c) -------- (b,c)
"""
y = np.array([a, c])
z = np.array([b, d])
if (x >= y).all() and (x <= z).all():
return 1/(b-a)/(d-c)
else:
return 0
x1 = np.array([0, 0])
print(multivariate_uniform(x1, -1, 1, -1, 1))
x2 = np.array([2, 0])
print(multivariate_uniform(x2, -1, 1, -1, 1))
x3 = np.array([0, 2])
print(multivariate_uniform(x3, -1, 1, -1, 1))
x4 = np.array([-2, 2])
print(multivariate_uniform(x4, -1, 1, -1, 1))
x5 = np.array([-1, 1])
print(multivariate_uniform(x5, -1, 1, -1, 1))
0.25
0
0
0
0.25

Gauss Siedel not converging

I have written this code, which is not converging and runs more iterations than I expect. I expect it to run 17 iterations and it does 24. I am not able to figure out the reason!
import numpy as np
from numpy import *
A = [[10, -1, 2, 0],
[ -1, 11, -1, 3 ],
[ 2, -1, 10, -1 ],
[ 0, 3, -1, 8 ] ]
b = [6, 25, -11, 15]
def GaussSiedelAccelerated(A, b, e, x, w):
e = float(e)
iterations = 0
Epsilon = float()
n = len(A)
condition = True
while condition:
for i in range(n):
s1 = 0
s2 = 0
tempx = x.copy() # Record answer of the previous iteration
for j in range(1,i,1):
s1 = s1 + x[j]*A[i][j]
for k in range(i+1,n,1):
s2 = s2 + tempx[k]*A[i][k]
x[i] = x[i]*(1-w) + w*(b[i] - s1 - s2)/A[i][i]
iterations = iterations +1
Epsilon = max(abs(x-tempx))/max(abs(x))
print("Output vector for the run no.", iterations, "is:", x)
print("Error for the run no.", iterations, "is: \t", Epsilon)
condition = Epsilon > e
return x, Epsilon, iterations
x0 = np.zeros(len(A))
x, Epsilon, iterations = GaussSiedelAccelerated(A,b,0.0001,x0, 1.1)

Replace all -1 in numpy array with values from another array

I have two numpy arrays, e. g.:
x = np.array([4, -1, 1, -1, -1, 2])
y = np.array([1, 2, 3, 4, 5, 6])
I would like to replace all -1 in x with numbers in y, that are not in x. First -1 with first number in y that is not in x (3), second -1 with second number in y that is not in x (5), ... So final x should be:
[4 3 1 5 6 2]
I created this function:
import numpy as np
import time
start = time.time()
def fill(x, y):
x_i = 0
y_i = 0
while x_i < len(x) and y_i < len(y):
if x[x_i] != -1: # If current value in x is not -1.
x_i += 1
continue
if y[y_i] in x: # If current value in y is already in x.
y_i += 1
continue
x[x_i] = y[y_i] # Replace current -1 in x by current value in y.
for i in range(10000):
x = np.array([4, -1, 1, -1, -1, 2])
y = np.array([1, 2, 3, 4, 5, 6])
fill(x, y)
end = time.time()
print(end - start) # 0.296
It's working, but I need run this function many times (e. g. milion times), so I would like to optimize it. Is there any way?
You could do:
import numpy as np
x = np.array([4, -1, 1, -1, -1, 2])
y = np.array([1, 2, 3, 4, 5, 6])
# create set of choices
sx = set(x)
choices = np.array([yi for yi in y if yi not in sx])
# assign new values
x[x == -1] = choices[:(x == -1).sum()]
print(x)
Output
[4 3 1 5 6 2]
y_not_in_x = np.setdiff1d(y, x)
x_eq_neg1 = x == -1
n_neg1s = np.sum(x_eq_neg1)
x[x_eq_neg1] = y_not_in_x[:n_neg1s]

How could I distribute obstacles to my grid without writing them manually?

I'm working on A star algorithm and as my code below shown the gird is written manually and I'm thinking to make a grid with 100* 100 size. So, it will be so awful to write them manually. I need to put my starting point at (0,0) location and my goal point at (99,99) location.
I'm trying to make the grid with this line below
grid1 = [[0 for i in range(100)]for j in range(100)]
But how could I assign obstacles to this grid randomly or not randomly without touching the location of starting point and goal point?
This is below my code:
from __future__ import print_function
import random
grid = [[0, 1, 0, 0, 0, 0],
[0, 1, 0, 0, 0, 0],#0 are free path whereas 1's are obstacles
[0, 1, 0, 0, 0, 0],
[0, 1, 0, 0, 1, 0],
[0, 0, 0, 0, 1, 0]]
'''
heuristic = [[9, 8, 7, 6, 5, 4],
[8, 7, 6, 5, 4, 3],
[7, 6, 5, 4, 3, 2],
[6, 5, 4, 3, 2, 1],
[5, 4, 3, 2, 1, 0]]'''
init = [0, 0]
goal = [len(grid)-1, len(grid[0])-1] #all coordinates are given in format [y,x]
cost = 1
drone_h = 60
#the cost map which pushes the path closer to the goal
heuristic = [[0 for row in range(len(grid[0]))] for col in range(len(grid))]
for i in range(len(grid)):
for j in range(len(grid[0])):
heuristic[i][j] = abs(i - goal[0]) + abs(j - goal[1])
#if grid[i][j] == 1:
#heuristic[i][j] = 99 #added extra penalty in the heuristic map
print(heuristic)
elevation = [[0 for row in range(len(grid[0]))] for col in range(len(grid))]
for i in range(len(grid)):
for j in range(len(grid[0])):
if grid[i][j] == 1:
elevation[i][j] = random.randint(1,100)
else:
elevation[i][j] = 0
#the actions we can take
delta = [[-1, 0 ], # go up
[ 0, -1], # go left
[ 1, 0 ], # go down
[ 0, 1 ]] # go right
#function to search the path
def search(grid,init,goal,cost,heuristic):
closed = [[0 for col in range(len(grid[0]))] for row in range(len(grid))]# the referrence grid
closed[init[0]][init[1]] = 1
action = [[0 for col in range(len(grid[0]))] for row in range(len(grid))]#the action grid
x = init[0]
y = init[1]
g = 0
f = g + heuristic[init[0]][init[0]] + elevation[init[0]][init[0]]
cell = [[f, g, x, y]]
found = False # flag that is set when search is complete
resign = False # flag set if we can't find expand
while not found and not resign:
if len(cell) == 0:
resign = True
return "FAIL"
else:
cell.sort()#to choose the least costliest action so as to move closer to the goal
cell.reverse()
next = cell.pop()
x = next[2]
y = next[3]
g = next[1]
f = next[0]
if x == goal[0] and y == goal[1]:
found = True
else:
for i in range(len(delta)):#to try out different valid actions
x2 = x + delta[i][0]
y2 = y + delta[i][1]
if x2 >= 0 and x2 < len(grid) and y2 >=0 and y2 < len(grid[0]):
if closed[x2][y2] == 0 and grid[x2][y2] == 0 and elevation[x2][y2] < drone_h :
g2 = g + cost
f2 = g2 + heuristic[x2][y2] + elevation[x2][y2]
cell.append([f2, g2, x2, y2])
closed[x2][y2] = 1
action[x2][y2] = i
invpath = []
x = goal[0]
y = goal[1]
invpath.append([x, y])#we get the reverse path from here
while x != init[0] or y != init[1]:
x2 = x - delta[action[x][y]][0]
y2 = y - delta[action[x][y]][1]
x = x2
y = y2
invpath.append([x, y])
path = []
for i in range(len(invpath)):
path.append(invpath[len(invpath) - 1 - i])
print("ACTION MAP")
for i in range(len(action)):
print(action[i])
return path
a = search(grid,init,goal,cost,heuristic)
for i in range(len(a)):
print(a[i])
You could assign the grid randomly and afterwards make sure both starting and end point don't contain obstacles. For neighboring fields you could just do the same as for those two.
import random
grid1 = [[random.randint(0,1) for i in range(100)]for j in range(100)]
# clear starting and end point of potential obstacles
grid1[0][0] = 0
grid1[99][99] = 0

Categories