Selecting values based on threshold using Python

Selecting values based on threshold using Python - python

The present code selects minimum values by scanning the adjoining elements in the same and the succeeding row. However, I want the code to select all the values if they are less than the threshold value. For example, in row 2, I want the code to pick both 0.86 and 0.88 since both are less than 0.9, and not merely minimum amongst 0.86,0.88. Basically, the code should pick up the minimum value if all the adjoining elements are greater than the threshold. If that's not the case, it should pick all the values less than the threshold.
import numpy as np
import numba as nb
Pe = np.random.rand(5,5)
def minValues(arr):
n, m = arr.shape
assert n >= 1 and m >= 2
res = []
i, j = 0, np.argmin(arr[0,:])
res.append((i, j))
iPrev = jPrev = -1
while iPrev < n-1:
cases = [(i, j-1), (i, j+1), (i+1, j)]
minVal = np.inf
iMin = jMin = -1
# Find the best candidate (smallest value)
for (i2, j2) in cases:
if i2 == iPrev and j2 == jPrev: # No cycles
continue
if i2 < 0 or i2 >= n or j2 < 0 or j2 >= m: # No out-of-bounds
continue
if arr[i2, j2] < minVal:
iMin, jMin = i2, j2
minVal = arr[i2, j2]
assert not np.isinf(minVal)
# Store it and update the values
res.append((iMin, jMin))
iPrev, jPrev = i, j
i, j = iMin, jMin
return np.array(res)
T=minValues(Pe)
Path=Pe[T.T[0], T.T[1]]
Current output:
Desired output:

Try this:
def minValues(arr):
n, m = arr.shape
assert n >= 1 and m >= 2
res = []
i, j = 0, np.argmin(arr[0,:])
print(f"Values i, j are: {i}, {j}")
res.append((i, j))
iPrev = jPrev = -1
while iPrev < n-1:
lowerVals = []
print(f"Values iPrev, jPrev are: {iPrev}, {jPrev}")
cases = [(i, j-1), (i, j+1), (i+1, j)]
print(f"Posible cases are: {cases}")
minVal = np.inf
print(f"MinVal is: {minVal}")
iMin = jMin = -1
# Find the best candidate (smallest value)
for (i2, j2) in cases:
if i2 == iPrev and j2 == jPrev: # No cycles
continue
if i2 < 0 or i2 >= n or j2 < 0 or j2 >= m: # No out-of-bounds
continue
if arr[i2, j2] < arr[i, j]:
lowerVals.append((i2, j2))
if arr[i2, j2] < minVal:
iMin, jMin = i2, j2
minVal = arr[i2, j2]
if not lowerVals: lowerVals.append((iMin, jMin))
print(f"Values iMin, jMin are: {iMin}, {jMin}")
print(f"MinVal is: {minVal}")
print(f"Lower values are: {lowerVals}")
assert not np.isinf(minVal)
# Store it and update the values
res += lowerVals
print(f"Final res after current iteration: {res}\n")
iPrev, jPrev = i, j
i, j = iMin, jMin
return np.array(res)
I used the prints for debugging, but I just check in each iteration all the values which are lower than the current value and add them to path at the end of the iteration.
Edit: introducing the extra line of my comment you get the above code, this should work.

This is an interesting problem - looking at hydrology flow/downhill direction?
The issue with the description is that you don't specify which of the neighboring cells to iterate to next, meaning, if you have three neighboring cells that are lower, they all get added to the output array but which is selected? Also, when looking back through the output array how do you know which ones got added with respect to a single neighboring cell? I went about this by selecting the lowest value of the neighboring cells with less than the source cell's value (biggest dropoff), then continued the search from that cell and so on. For each iteration, there is a new nested list of the cell positions for the neighboring cells that are lower. This could readily be updated any number of ways, to include exporting an additional list showing which cells were used for search (essentially the path taken, while there might be highlighted cells (lower) that were not visited per se.
import numpy as np
Pe = np.random.rand(5,5)
def nearestMin(arr, output_arr=[[0,0]], n=0, m=0):
if m==arr.shape[1]:
return output_arr
lower_vals = np.argwhere(arr < arr[n,m])
if lower_vals.size == 0: #No values anywhere less than current cell
return output_arr
#Get offset from current point, as tuple of x and y distance
smaller_adjacents = np.where(Pe < Pe[n, m])
if smaller_adjacents[0].size == 0: #No lower points anywhere
return output_arr
ind = np.where((abs(n - smaller_adjacents[0]) <= 1 ) & \
(abs(m - smaller_adjacents[1]) <= 1))[0]
if ind.size == 0: #No lower points neighboring
return output_arr
xs, ys = smaller_adjacents[0][ind], smaller_adjacents[1][ind]
lower_neighbors = np.vstack((xs, ys)).T.tolist()
output_arr.append(lower_neighbors) #Unbracket to have final output array be flat (2d)
n_depth = [Pe[x,y] for x,y in lower_neighbors] - Pe[n,m]
biggest_drop = np.where(n_depth==n_depth.min())[0][0]
lowest_neighbor = lower_neighbors[biggest_drop]
return nearestMin(arr, output_arr, n=lowest_neighbor[0], m=lowest_neighbor[1])
nearestMin(Pe)
If you want the path, the same code can be modified as shown below to return that as a second array. In this example, the values for the path are listed. Each value corresponds to the "list of lists" item in the first returned array of neighboring lower values. Again, not sure exactly what you're going for.
import numpy as np
Pe = np.random.rand(5,5)
def nearestMin(arr, output_arr=[[0,0]], path=[], n=0, m=0):
#Initialize path variable properly
if output_arr == [[0,0]]:
path.append(arr[0,0])
if m==arr.shape[1]:
return (output_arr, path)
lower_vals = np.argwhere(arr < arr[n,m])
if lower_vals.size == 0: #No values anywhere less than current cell
return (output_arr, path)
#Get offset from current point, as tuple of x and y distance
smaller_adjacents = np.where(Pe < Pe[n, m])
if smaller_adjacents[0].size == 0: #No lower points anywhere
return (output_arr, path)
ind = np.where((abs(n - smaller_adjacents[0]) <= 1 ) & \
(abs(m - smaller_adjacents[1]) <= 1))[0]
if ind.size == 0: #No lower points neighboring
return (output_arr, path)
xs, ys = smaller_adjacents[0][ind], smaller_adjacents[1][ind]
lower_neighbors = np.vstack((xs, ys)).T.tolist()
output_arr.append(lower_neighbors) #Unbracket to have final output array be flat (2d)
n_depth = [Pe[x,y] for x,y in lower_neighbors] - Pe[n,m]
biggest_drop = np.where(n_depth==n_depth.min())[0][0]
lowest_neighbor = lower_neighbors[biggest_drop]
path.append(arr[lowest_neighbor[0], lowest_neighbor[1]])
return nearestMin(arr, output_arr, path, n=lowest_neighbor[0], m=lowest_neighbor[1])
nearestMin(Pe)

Related

python find biggest sequence of zeros in list of lists (recursion)

I need to find the biggest sequence of zeros next to each other (up down left right).
for example in this example the function should return 6
mat = [[1,**0**,**0**,3,0],
[**0**,**0**,2,3,0],
[2,**0**,**0**,2,0],
[0,1,2,3,3],]
the zeros that i marked as bold should be the answer (6)
the solution should be implemented without any loop (using recursion)
this is what i tried so far
def question_3_b(some_list,index_cord):
y = index_cord[0]
x = index_cord[1]
list_of_nums = []
def main(some_list,index_cord):
y = index_cord[0]
x = index_cord[1]
def check_right(x,y):
if x + 1 < 0:
return 0
if some_list[y][x+1] == 0:
main(some_list,(y,x+1))
else:
return 0
def check_left(x,y):
if x -1 < 0:
return 0
if some_list[y][x - 1] == 0:
main(some_list,(y, x - 1))
def check_down(x,y):
if y + 1 < 0:
return 0
try:
if some_list[y + 1][x] == 0:
main(some_list,(y + 1, x))
except:
print("out of range")
def check_up(x,y):
counter_up = 0
if y - 1 < 0:
return 0
if some_list[y - 1][x] == 0:
counter_up += 1
main(some_list,(y - 1, x))
list_of_nums.append((x,y))
right = check_right(x,y)
down = check_down(x,y)
left = check_left(x,y)
up = check_up(x, y)
main(some_list,index_cord)
print(list_of_nums)
question_3_b(mat,(0,1))

Solution #1: classic BFS
As I mention in a comment, you can tackle this problem using BFS (Breadth First Search), it will be something like this:
# This function will give the valid adjacent positions
# of a given position according the matrix size (NxM)
def valid_adj(i, j, N, M):
adjs = [[i + 1, j], [i - 1, j], [i, j + 1], [i, j - 1]]
for a_i, a_j in adjs:
if 0 <= a_i < N and 0 <= a_j < M:
yield a_i, a_j
def biggest_zero_chunk(mat):
answer = 0
N, M = len(mat), len(mat[0])
# Mark all non zero position as visited (we are not instrested in them)
mask = [[mat[i][j] != 0 for j in range(M)] for i in range(N)]
queue = []
for i in range(N):
for j in range(M):
if mask[i][j]: # You have visited this position
continue
# Here comes the BFS
# It visits all the adjacent zeros recursively,
# count them and mark them as visited
current_ans = 1
queue = [[i,j]]
while queue:
pos_i, pos_j = queue.pop(0)
mask[pos_i][pos_j] = True
for a_i, a_j in valid_adj(pos_i, pos_j, N, M):
if mat[a_i][a_j] == 0 and not mask[a_i][a_j]:
queue.append([a_i, a_j])
current_ans += 1
answer = max(answer, current_ans)
return answer
mat = [[1,0,0,3,0],
[0,0,2,3,0],
[2,0,0,2,0],
[0,1,2,3,3],]
mat2 = [[1,0,0,3,0],
[0,0,2,3,0],
[2,0,0,0,0], # A slight modification in this row to connect two chunks
[0,1,2,3,3],]
print(biggest_zero_chunk(mat))
print(biggest_zero_chunk(mat2))
Output:
6
10
Solution #2: using only recursion (no for statements)
def count_zeros(mat, i, j, N, M):
# Base case
# Don't search zero chunks if invalid position or non zero values
if i < 0 or i >= N or j < 0 or j >= M or mat[i][j] != 0:
return 0
ans = 1 # To count the current zero we start at 1
mat[i][j] = 1 # To erase the current zero and don't count it again
ans += count_zeros(mat, i - 1, j, N, M) # Up
ans += count_zeros(mat, i + 1, j, N, M) # Down
ans += count_zeros(mat, i, j - 1, N, M) # Left
ans += count_zeros(mat, i, j + 1, N, M) # Right
return ans
def biggest_zero_chunk(mat, i = 0, j = 0, current_ans = 0):
N, M = len(mat), len(mat[0])
# Base case (last position of mat)
if i == N - 1 and j == M - 1:
return current_ans
next_j = (j + 1) % M # Move to next column, 0 if j is the last one
next_i = i + 1 if next_j == 0 else i # Move to next row if j is 0
ans = count_zeros(mat, i, j, N, M) # Count zeros from this position
current_ans = max(ans, current_ans) # Update the current answer
return biggest_zero_chunk(mat, next_i, next_j, current_ans) # Check the rest of mat
mat = [[1,0,0,3,0],
[0,0,2,3,0],
[2,0,0,2,0],
[0,1,2,3,3],]
mat2 = [[1,0,0,3,0],
[0,0,2,3,0],
[2,0,0,0,0], # A slight modification in this row to connect two chunks
[0,1,2,3,3],]
print(biggest_zero_chunk(mat.copy()))
print(biggest_zero_chunk(mat2.copy()))
Output:
6
10
Notes:
The idea behind this solution is still BFS (represented mainly in the count_zeros function). Also, if you are interested in using the matrix values after this you should call the biggest_zero_chunk with a copy of the matrix (because it is modified in the algorithm)

Absolute value of a difference in gurobi with abs_() not working

I've got a problem with a gurobi program which is supposed to find a certain number of distinct shortest paths in a graph with a length not exceeding maxLength by using an LP. For making sure that the different paths are distinct I tried to sum up the number of arcs where one path is different from another. y[a,i,j] should be one if paths i and j are different in arc a and zero otherwise.
I tried to achieve that by taking the difference between x[a,i] and x[a,j] at every arc and then expect the sum over all arcs for every combination of i and j to be greater one. Everything above that are just constraints for a regular min cost flow. Somehow my problem is infeasible for any of the test instances if I want more than 1 path. Any ideas? Thanks in advance.
def findXShortestPaths(V, A, pred, succ, start, end, cost, amount, maxLength, origin, destination):
model = Model("Shortest Path")
I = range(amount)
x = model.addVars(A, I, vtype = GRB.BINARY, name = "x")
y = model.addVars(A, I, I, vtype = GRB.INTEGER, name = "y")
z = model.addVars(A,I,I,vtype=GRB.BINARY,name="z")
model.setObjective(quicksum(cost[a] * x[a, i] for a in A for i in I), GRB.MINIMIZE)
model.addConstrs(quicksum(x[a,i] for a in pred[v]) - quicksum(x[a,i] for a in succ[v]) == 0 for i in I for v in V if v != origin and v != destination)
model.addConstrs(quicksum(x[a,i] for a in succ[origin]) == 1 for i in I)
model.addConstrs(quicksum(x[a,i] for a in pred[destination]) == 1 for i in I)
model.addConstrs(x[a,i] + x[b,i] <= 1 for i in I for a in A for b in A if end[a] == start[b] and end[b] == start[a])
model.addConstrs(y[a,i,j]==x[a,i]-x[a,j] for a in A for i in I for j in I)
model.addConstrs(z[a,i,j]== abs_(y[a,i,j]) for a in A for i in I for j in I)
model.addConstrs(quicksum(z[a,i,j] for a in A) >= 1 for i in I for j in I if i != j)
model.addConstrs(quicksum(x[a,i]*cost[a] for a in A) <= maxLength for i in I)
model.optimize()

Finding first pair of numbers in array that sum to value

Im trying to solve the following Codewars problem: https://www.codewars.com/kata/sum-of-pairs/train/python
Here is my current implementation in Python:
def sum_pairs(ints, s):
right = float("inf")
n = len(ints)
m = {}
dup = {}
for i, x in enumerate(ints):
if x not in m.keys():
m[x] = i # Track first index of x using hash map.
elif x in m.keys() and x not in dup.keys():
dup[x] = i
for x in m.keys():
if s - x in m.keys():
if x == s-x and x in dup.keys():
j = m[x]
k = dup[x]
else:
j = m[x]
k = m[s-x]
comp = max(j,k)
if comp < right and j!= k:
right = comp
if right > n:
return None
return [s - ints[right],ints[right]]
The code seems to produce correct results, however the input can consist of array with up to 10 000 000 elements, so the execution times out for large inputs. I need help with optimizing/modifying the code so that it can handle sufficiently large arrays.

Your code inefficient for large list test cases so it gives timeout error. Instead you can do:
def sum_pairs(lst, s):
seen = set()
for item in lst:
if s - item in seen:
return [s - item, item]
seen.add(item)
We put the values in seen until we find a value that produces the specified sum with one of the seen values.
For more information go: Referance link

Maybe this code:
def sum_pairs(lst, s):
c = 0
while c<len(lst)-1:
if c != len(lst)-1:
x= lst[c]
spam = c+1
while spam < len(lst):
nxt= lst[spam]
if nxt + x== s:
return [x, nxt]
spam += 1
else:
return None
c +=1
lst = [5, 6, 5, 8]
s = 14
print(sum_pairs(lst, s))
Output:
[6, 8]

This answer unfortunately still times out, even though it's supposed to run in O(n^3) (since it is dominated by the sort, the rest of the algorithm running in O(n)). I'm not sure how you can obtain better than this complexity, but I thought I might put this idea out there.
def sum_pairs(ints, s):
ints_with_idx = enumerate(ints)
# Sort the array of ints
ints_with_idx = sorted(ints_with_idx, key = lambda (idx, num) : num)
diff = 1000000
l = 0
r = len(ints) - 1
# Indexes of the sum operands in sorted array
lSum = 0
rSum = 0
while l < r:
# Compute the absolute difference between the current sum and the desired sum
sum = ints_with_idx[l][1] + ints_with_idx[r][1]
absDiff = abs(sum - s)
if absDiff < diff:
# Update the best difference
lSum = l
rSum = r
diff = absDiff
elif sum > s:
# Decrease the large value
r -= 1
else:
# Test to see if the indexes are better (more to the left) for the same difference
if absDiff == diff:
rightmostIdx = max(ints_with_idx[l][0], ints_with_idx[r][0])
if rightmostIdx < max(ints_with_idx[lSum][0], ints_with_idx[rSum][0]):
lSum = l
rSum = r
# Increase the small value
l += 1
# Retrieve indexes of sum operands
aSumIdx = ints_with_idx[lSum][0]
bSumIdx = ints_with_idx[rSum][0]
# Retrieve values of operands for sum in correct order
aSum = ints[min(aSumIdx, bSumIdx)]
bSum = ints[max(aSumIdx, bSumIdx)]
if aSum + bSum == s:
return [aSum, bSum]
else:
return None

Python: IndexError: index 7 is out of bounds for axis 1 with size 7

I'm trying to loop over a matrix and replacing the entries using operations involving the matrices around it:
For example,
xarray = np.array([3,35,7,9,8,7,6])
yarray = np.array([2,5,1,7,3,59,2])
zarray = np.array([3,5,6,3.5,69,2,1])
barray = np.array([1,5,56,7,24,2,1])
carray = np.array([1,5,56,7,24,2,1])
darray = np.array([1,5,56,7,24,2,1])
earray = np.array([1,5,56,7,24,2,1])
Q = np.array([xarray,yarray,zarray,barray,carray,darray,earray])
k = np.shape(Q)
for i in range(k[0]):
for j in range(k[1]):
$taking into account entries on the corners which don't have the same amount of surrounding matrices
if i==0 or j==0 or i == k[0] or j == k[1]:
Q[i,j] = Q[i,j]
else:
Q[i,j] = (Q[i,j]+Q[i-1,j]+Q[i+1,j]+Q[i,j+1]+Q[i-1,j-1]+Q[i-1,j+1])*3
print Q
But I get the error above. Any help would be appreciated.

Remember that in python indexes start from 0, which means if i or j is equal to the length of the array, you'll get an error
All I've done here is switched range(k[x]) for range(k[x] - 1)
for i in range(k[0] - 1):
for j in range(k[1] - 1):
if i==0 or j==0:
Q[i,j] = Q[i,j]
else:
Q[i,j] = (Q[i,j]+Q[i-1,j]+Q[i+1,j]+Q[i,j+1]+Q[i-1,j-1]+Q[i-1,j+1])*3

Merge Sort Index Error

def merge (seq, p, q, r):
# n1: length of sub-array [p..q]
n1 = q - p + 1
# n2: length of sub-array [q+1 ..r]
n2 = r - q
# Left and Right arrays
left_arr = []
right_arr = []
for i in xrange(0, n1):
left_arr.append( seq[p+i] )
for j in xrange(0, n2):
right_arr.append( seq[q+j+1] )
j=0
i=0
for k in xrange(p,r+1):
if left_arr[i]<= right_arr[j]:
seq[k]=left_arr[i]
i+=1
else:
seq[k]=right_arr[j]
j+=1
return seq
s = [2,4,5,7,1,2,3,6]
p = 0
q = 3
r = 7
print merge(s,p,q,r)
How it works:
A unsorted sequence s is taken, along with the index numbers where the sequence has to be merged. (p=initial, r = final, q=middle)
Now, left_arr and right_arr are [p,q], [q+1, r] respectively
we take left_arr and right_arr initial values (i=0, j=0). We iterate over the sequence seq...
while iterating we are replacing the values of seq with the sorted values...
The above function is able to sort till last number "7".. at the end, its showing "IndexError". I can use exception handling to catch it and fix but I think... for merge sort, its too much.. Can anyone help me in fixing the code as easy as possible.
I am learning Algorithms.. (following book: Introduction to Algorithms by Thomas H. Cormen)

the problem is that at the last iteration i will be equal to 4 and you are trying to access left_arr[5] which does not exist. you should add a stopping condition when i or j become larger then the size of the corresponding array, and then add all the remaining elements in the other array to seq.
Here is a code that works:
def merge (seq, p, q, r):
# n1: length of sub-array [p..q]
n1 = q - p + 1
# n2: length of sub-array [q+1 ..r]
n2 = r - q
# Left and Right arrays
left_arr = seq[p:n1] #here
right_arr = seq[n1:r+1] #here
j=0
i=0
for k in xrange(p, r+1):
if left_arr[i]<= right_arr[j]:
seq[k]=left_arr[i]
i+=1
if i > n1-1: #here
break
else:
seq[k]=right_arr[j] #here
j+=1
if j > n2-1:
break
if i >= len(left_arr): #from here down
seq[k+1:] = right_arr[j:]
elif j >= len(right_arr):
seq[k+1:] = left_arr[i:]
return seq
s = [2,4,5,7,1,1,1,1]
p = 0
q = 3
r = 7
print merge(s,p,q,r)
I have marked with comments the places where I have edited your code.

The problem with your code is that you're looping over xrange(p, r+1), so during that loop in some iteration the value of either i or j might become equal the value of len(left) or len(right), causing the IndexError eventually.
def merge(seq,p,q,r):
left=seq[p:q+1] #a better way to fetch the list
right=seq[q+1:]
i=0
j=0
#you shuldn't loop over the length of seq as that might make the value of either i or j
# greater than the length of left or right lists respectively.
# so you should only loop until one of the lists is fully exhausted
while i<len(left) and j<len(right):
if left[i] <= right[j] :
seq[i+j]=left[i]
i+=1
else:
seq[i+j]=right[j]
j+=1
#now the while loop is over so either right or left is fully traversed, which can be
#find out my matching the value of j and i with lenghts of right and left lists respectively
if j == len(right):
seq[i+j:]=left[i:] #if right is fully traversed then paste the remaining left list into seq
elif i==len(left): #this is just vice-versa of the above step
seq[i+j:]=right[j:]
print seq
s = [2,4,5,7,1,2,3,6]
p = 0
q = 3
r = 7
merge(s,p,q,r)
output:
[1, 2, 2, 3, 4, 5, 6, 7]

You didn't check the array index while iterating left_arr and right_arr.
You should merge the left part of either array when another array ends.
for k in xrange(p,r+1):
if left_arr[i]<= right_arr[j]:
seq[k]=left_arr[i]
i+=1
else:
seq[k]=right_arr[j]
j+=1
# --------------- add this ----------------
# if any array ends, merge the left elements of the other array
if i >= len(left_arr):
seq[k:] = right_arr[j:]
break
elif j >= len(right_arr):
seq[k:] = left_arr[i:]
break
# --------------- end ----------------
return seq

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

Selecting values based on threshold using Python - python

Related

python find biggest sequence of zeros in list of lists (recursion)

Absolute value of a difference in gurobi with abs_() not working

Finding first pair of numbers in array that sum to value

Python: IndexError: index 7 is out of bounds for axis 1 with size 7

Merge Sort Index Error

Categories

Resources