Replace all -1 in numpy array with values from another array - python

I have two numpy arrays, e. g.:
x = np.array([4, -1, 1, -1, -1, 2])
y = np.array([1, 2, 3, 4, 5, 6])
I would like to replace all -1 in x with numbers in y, that are not in x. First -1 with first number in y that is not in x (3), second -1 with second number in y that is not in x (5), ... So final x should be:
[4 3 1 5 6 2]
I created this function:
import numpy as np
import time
start = time.time()
def fill(x, y):
x_i = 0
y_i = 0
while x_i < len(x) and y_i < len(y):
if x[x_i] != -1: # If current value in x is not -1.
x_i += 1
continue
if y[y_i] in x: # If current value in y is already in x.
y_i += 1
continue
x[x_i] = y[y_i] # Replace current -1 in x by current value in y.
for i in range(10000):
x = np.array([4, -1, 1, -1, -1, 2])
y = np.array([1, 2, 3, 4, 5, 6])
fill(x, y)
end = time.time()
print(end - start) # 0.296
It's working, but I need run this function many times (e. g. milion times), so I would like to optimize it. Is there any way?

You could do:
import numpy as np
x = np.array([4, -1, 1, -1, -1, 2])
y = np.array([1, 2, 3, 4, 5, 6])
# create set of choices
sx = set(x)
choices = np.array([yi for yi in y if yi not in sx])
# assign new values
x[x == -1] = choices[:(x == -1).sum()]
print(x)
Output
[4 3 1 5 6 2]

y_not_in_x = np.setdiff1d(y, x)
x_eq_neg1 = x == -1
n_neg1s = np.sum(x_eq_neg1)
x[x_eq_neg1] = y_not_in_x[:n_neg1s]

Related

How do I find the x and y values in an array where the array is divided equally in a cartesian orientation?

What I'm trying to do is simply find and define an x and y coordinate for the highest number in the array.
For example, 50000 would output: x = 2, y = 2. Is there an easy way to accomplish this?
I created this code below:
data_array = [[0, 1, 2, 3, 50000],
[5, 6, 7, 8, 9],
[10, 11, 12, 13, 14],
[15, 16, 17, 18, 19],
[20, 21, 22, 23, 24]]
highest_num = data_array[0][0]
x = 0
y = 0
# looping from 0 to len(data_array)-1
for i in range(len(data_array)):
# looping from 0 to len(data_array[i])-1
for j in range(len(data_array[i])):
# checking data_array[x][y] is less than data_array[i][j]
if data_array[x][y] < data_array[i][j]:
# updating x and y
x = i
y = j
highest_num = data_array[i][j]
# printing the values of highest_num, x and y
print("highest_num =", highest_num)
print("x =", x)
print("y =", y)
But I would get x = 0, y = 4. I wanted to reference the middle of the array which is 12 and make the output be x = 2, y = 2.
Can this be accomplished without numpy where? I want the points to track with the max wherever it is.
Modify 2 lines in your program:
print("x =", x-2)
print("y =", y+2)
if you could use numpy do the argmax like this then find the coordinate (row col) and coordinate switch to chartersian (reverse y) then both subtracte the half , (considering it is even in both dimentions.
idx = np.argmax(data_array)
m, n = len(data_array), len(data_array[0])
r, c = m - (idx // n) - 1 , idx % n
y , x = r - (m // 2), c - (n // 2)
if not just use the code your writetn to find maximum index.

N-D indexing with defaults in NumPy

Can I index NumPy N-D array with fallback to default values for out-of-bounds indexes? Example code below for some imaginary np.get_with_default(a, indexes, default):
import numpy as np
print(np.get_with_default(
np.array([[1,2,3],[4,5,6]]), # N-D array
[(np.array([0, 0, 1, 1, 2, 2]), np.array([1, 2, 2, 3, 3, 5]))], # N-tuple of indexes along each axis
13, # Default for out-of-bounds fallback
))
should print
[2 3 6 13 13 13]
I'm looking for some built-in function for this. If such not exists then at least some short and efficient implementation to do that.
I arrived at this question because I was looking for exactly the same. I came up with the following function, which does what you ask for 2 dimension. It could likely be generalised to N dimensions.
def get_with_defaults(a, xx, yy, nodata):
# get values from a, clipping the index values to valid ranges
res = a[np.clip(yy, 0, a.shape[0] - 1), np.clip(xx, 0, a.shape[1] - 1)]
# compute a mask for both x and y, where all invalid index values are set to true
myy = np.ma.masked_outside(yy, 0, a.shape[0] - 1).mask
mxx = np.ma.masked_outside(xx, 0, a.shape[1] - 1).mask
# replace all values in res with NODATA, where either the x or y index are invalid
np.choose(myy + mxx, [res, nodata], out=res)
return res
xx and yy are the index array, a is indexed by (y,x).
This gives:
>>> a=np.zeros((3,2),dtype=int)
>>> get_with_defaults(a, (-1, 1000, 0, 1, 2), (0, -1, 0, 1, 2), -1)
array([-1, -1, 0, 0, -1])
As an alternative, the following implementation achieves the same and is more concise:
def get_with_default(a, xx, yy, nodata):
# get values from a, clipping the index values to valid ranges
res = a[np.clip(yy, 0, a.shape[0] - 1), np.clip(xx, 0, a.shape[1] - 1)]
# replace all values in res with NODATA (gets broadcasted to the result array), where
# either the x or y index are invalid
res[(yy < 0) | (yy >= a.shape[0]) | (xx < 0) | (xx >= a.shape[1])] = nodata
return res
I don't know if there is anything in NumPy to do that directly, but you can always implement it yourself. This is not particularly smart or efficient, as it requires multiple advanced indexing operations, but does what you need:
import numpy as np
def get_with_default(a, indices, default=0):
# Ensure inputs are arrays
a = np.asarray(a)
indices = tuple(np.broadcast_arrays(*indices))
if len(indices) <= 0 or len(indices) > a.ndim:
raise ValueError('invalid number of indices.')
# Make mask of indices out of bounds
mask = np.zeros(indices[0].shape, np.bool)
for ind, s in zip(indices, a.shape):
mask |= (ind < 0) | (ind >= s)
# Only do masking if necessary
n_mask = np.count_nonzero(mask)
# Shortcut for the case where all is masked
if n_mask == mask.size:
return np.full_like(a, default)
if n_mask > 0:
# Ensure index arrays are contiguous so masking works right
indices = tuple(map(np.ascontiguousarray, indices))
for ind in indices:
# Replace masked indices with zeros
ind[mask] = 0
# Get values
res = a[indices]
if n_mask > 0:
# Replace values of masked indices with default value
res[mask] = default
return res
# Test
print(get_with_default(
np.array([[1,2,3],[4,5,6]]),
(np.array([0, 0, 1, 1, 2, 2]), np.array([1, 2, 2, 3, 3, 5])),
13
))
# [ 2 3 6 13 13 13]
I also needed a solution to this, but I wanted a solution that worked in N dimensions. I made Markus' solution work for N-dimensions, including selecting from an array with more dimensions than the coordinates point to.
def get_with_defaults(arr, coords, nodata):
coords, shp = np.array(coords), np.array(arr.shape)
# Get values from arr, clipping to valid ranges
res = arr[tuple(np.clip(c, 0, s-1) for c, s in zip(coords, shp))]
# Set any output where one of the coords was out of range to nodata
res[np.any(~((0 <= coords) & (coords < shp[:len(coords), None])), axis=0)] = nodata
return res
import numpy as np
if __name__ == '__main__':
A = np.array([[1,2,3],[4,5,6]])
B = np.array([[[1, -9],[2, -8],[3, -7]],[[4, -6],[5, -5],[6, -4]]])
coords1 = [[0, 0, 1, 1, 2, 2], [1, 2, 2, 3, 3, 5]]
coords2 = [[0, 0, 1, 1, 2, 2], [1, 2, 2, 3, 3, 5], [1, 1, 1, 1, 1, 1]]
out1 = get_with_defaults(A, coords1, 13)
out2 = get_with_defaults(B, coords1, 13)
out3 = get_with_defaults(B, coords2, 13)
print(out1)
# [2, 3, 6, 13, 13, 13]
print(out2)
# [[ 2 -8]
# [ 3 -7]
# [ 6 -4]
# [13 13]
# [13 13]
# [13 13]]
print(out3)
# [-8, -7, -4, 13, 13, 13]

How could I distribute obstacles to my grid without writing them manually?

I'm working on A star algorithm and as my code below shown the gird is written manually and I'm thinking to make a grid with 100* 100 size. So, it will be so awful to write them manually. I need to put my starting point at (0,0) location and my goal point at (99,99) location.
I'm trying to make the grid with this line below
grid1 = [[0 for i in range(100)]for j in range(100)]
But how could I assign obstacles to this grid randomly or not randomly without touching the location of starting point and goal point?
This is below my code:
from __future__ import print_function
import random
grid = [[0, 1, 0, 0, 0, 0],
[0, 1, 0, 0, 0, 0],#0 are free path whereas 1's are obstacles
[0, 1, 0, 0, 0, 0],
[0, 1, 0, 0, 1, 0],
[0, 0, 0, 0, 1, 0]]
'''
heuristic = [[9, 8, 7, 6, 5, 4],
[8, 7, 6, 5, 4, 3],
[7, 6, 5, 4, 3, 2],
[6, 5, 4, 3, 2, 1],
[5, 4, 3, 2, 1, 0]]'''
init = [0, 0]
goal = [len(grid)-1, len(grid[0])-1] #all coordinates are given in format [y,x]
cost = 1
drone_h = 60
#the cost map which pushes the path closer to the goal
heuristic = [[0 for row in range(len(grid[0]))] for col in range(len(grid))]
for i in range(len(grid)):
for j in range(len(grid[0])):
heuristic[i][j] = abs(i - goal[0]) + abs(j - goal[1])
#if grid[i][j] == 1:
#heuristic[i][j] = 99 #added extra penalty in the heuristic map
print(heuristic)
elevation = [[0 for row in range(len(grid[0]))] for col in range(len(grid))]
for i in range(len(grid)):
for j in range(len(grid[0])):
if grid[i][j] == 1:
elevation[i][j] = random.randint(1,100)
else:
elevation[i][j] = 0
#the actions we can take
delta = [[-1, 0 ], # go up
[ 0, -1], # go left
[ 1, 0 ], # go down
[ 0, 1 ]] # go right
#function to search the path
def search(grid,init,goal,cost,heuristic):
closed = [[0 for col in range(len(grid[0]))] for row in range(len(grid))]# the referrence grid
closed[init[0]][init[1]] = 1
action = [[0 for col in range(len(grid[0]))] for row in range(len(grid))]#the action grid
x = init[0]
y = init[1]
g = 0
f = g + heuristic[init[0]][init[0]] + elevation[init[0]][init[0]]
cell = [[f, g, x, y]]
found = False # flag that is set when search is complete
resign = False # flag set if we can't find expand
while not found and not resign:
if len(cell) == 0:
resign = True
return "FAIL"
else:
cell.sort()#to choose the least costliest action so as to move closer to the goal
cell.reverse()
next = cell.pop()
x = next[2]
y = next[3]
g = next[1]
f = next[0]
if x == goal[0] and y == goal[1]:
found = True
else:
for i in range(len(delta)):#to try out different valid actions
x2 = x + delta[i][0]
y2 = y + delta[i][1]
if x2 >= 0 and x2 < len(grid) and y2 >=0 and y2 < len(grid[0]):
if closed[x2][y2] == 0 and grid[x2][y2] == 0 and elevation[x2][y2] < drone_h :
g2 = g + cost
f2 = g2 + heuristic[x2][y2] + elevation[x2][y2]
cell.append([f2, g2, x2, y2])
closed[x2][y2] = 1
action[x2][y2] = i
invpath = []
x = goal[0]
y = goal[1]
invpath.append([x, y])#we get the reverse path from here
while x != init[0] or y != init[1]:
x2 = x - delta[action[x][y]][0]
y2 = y - delta[action[x][y]][1]
x = x2
y = y2
invpath.append([x, y])
path = []
for i in range(len(invpath)):
path.append(invpath[len(invpath) - 1 - i])
print("ACTION MAP")
for i in range(len(action)):
print(action[i])
return path
a = search(grid,init,goal,cost,heuristic)
for i in range(len(a)):
print(a[i])
You could assign the grid randomly and afterwards make sure both starting and end point don't contain obstacles. For neighboring fields you could just do the same as for those two.
import random
grid1 = [[random.randint(0,1) for i in range(100)]for j in range(100)]
# clear starting and end point of potential obstacles
grid1[0][0] = 0
grid1[99][99] = 0

How to iteratively append a dataframe representing a spatial x and y scenario using nested for loops?

I'm trying to create a dataframe to represent a topographical expression. So far I've written a pair of for loops that can individually be used to express the x and y axis, specifically in the forms,
a = []
for x in range(1,i,1):
x1 = some function of x
x2 = another function of x
a.append({'a':x, 'b':x1, 'c': x2})
xaxis = pd.DataFrame(a)
for the x axis and,
a = []
for y in range(-j, j, 1):
y1 = some function of y
a.append({'a':y,'b':y1})
yaxis = pd.DataFrame(a)
for the y axis.
That's all simple enough and works fine, however...
I want to expand on this such that the y axis loop is repeated with each iteration of the x axis loop and have the y1 function depend on the parameters of the x axis loop. I get this far,
a = []
for x in range(1,i,1):
x1 = some function of x
x2 = another function of x
for y in range(-j, j, 1):
y1 = some function of y that calls x2
a.append({
and I'm stumped.
The output I'm after is essentially this,
x x1 x2 y y1
x1(1) x2(1) -j y1(1,-j)
1 x1(1) x2(1) 0 y1(1,0)
x1(1) x2(1) j y1(1,j)
x1(2) x2(2) -j y1(2,-j)
2 x1(2) x2(2) 0 y1(2,0)
x1(2) x2(2) j y1(2,j)
....
and so on to x = i.
The end desire is to have data that can be plotted in a 2D histogram
If there's a better way to do this then please do let me know, this is just the only way I can currently think of that may get the result I'm after.
edit: Turns out this can be done quite effectively using numpy arrays. This is a general expression on how I achieved this goal in the end,
y1 = lambda x,y: f(x,y)
np.array( [ [ y1(x,y) for x in xrange(1,i,1)] for y in xrange(-j,j,1)] )
You have to find a vectorized version of your functions. This can be achieved (for intstant) by using numpy's several vectorized functions or by using numpy.vectorized(). Take a look on that example :
import numpy as np
def f1(x):
return x**2
def f2(x):
return np.abs(x)
def f3(x,y):
return x**2 + y**2
i = 3 ; j = 2
x = np.arange(1,i,1)
y = np.arange(-j,j,1)
# Now build cartesian product of x and y
xy = np.array([np.tile(x, len(y)), np.repeat(y, len(x))])
xy
array([[ 1, 2, 1, 2, 1, 2, 1, 2],
[-2, -2, -1, -1, 0, 0, 1, 1]])
x1 = f1(xy[0,])
x1
array([1, 4, 1, 4, 1, 4, 1, 4], dtype=int32)
x2 = f2(xy[0,])
x2
array([1, 2, 1, 2, 1, 2, 1, 2])
y1 = f3(xy[0,], xy[1,])
y1
array([5, 8, 2, 5, 1, 4, 2, 5], dtype=int32)

Summing and removing repeated elements of Numpy Arrays

I have 4 1D Numpy arrays of equal length.
The first three act as an ID, uniquely identifying the 4th array.
The ID arrays contain repeated combinations, for which I need to sum the 4th array, and remove the repeating element from all 4 arrays.
x = np.array([1, 2, 4, 1])
y = np.array([1, 1, 4, 1])
z = np.array([1, 2, 2, 1])
data = np.array([4, 7, 3, 2])
In this case I need:
x = [1, 2, 4]
y = [1, 1, 4]
z = [1, 2, 2]
data = [6, 7, 3]
The arrays are rather long so loops really won't work. I'm sure there is a fairly simple way to do this, but for the life of me I can't figure it out.
To get started, we can stack the ID vectors into a matrix such that each ID is a row of three values:
XYZ = np.vstack((x,y,z)).T
Now, we just need to find the indices of repeated rows. Unfortunately, np.unique doesn't operate on rows, so we need to do some tricks:
order = np.lexsort(XYZ.T)
diff = np.diff(XYZ[order], axis=0)
uniq_mask = np.append(True, (diff != 0).any(axis=1))
This part is borrowed from the np.unique source code, and finds the unique indices as well as the "inverse index" mapping:
uniq_inds = order[uniq_mask]
inv_idx = np.zeros_like(order)
inv_idx[order] = np.cumsum(uniq_mask) - 1
Finally, sum over the unique indices:
data = np.bincount(inv_idx, weights=data)
x,y,z = XYZ[uniq_inds].T
You can use unique and sum as reptilicus suggested to do the following
from itertools import izip
import numpy as np
x = np.array([1, 2, 4, 1])
y = np.array([1, 1, 4, 1])
z = np.array([1, 2, 2, 1])
data = np.array([4, 7, 3, 2])
# N = len(x)
# ids = x + y*N + z*(N**2)
ids = np.array([hash((a, b, c)) for a, b, c in izip(x, y, z)]) # creates flat ids
_, idx, idx_rep = np.unique(ids, return_index=True, return_inverse=True)
x_out = x[idx]
y_out = y[idx]
z_out = z[idx]
# data_out = np.array([np.sum(data[idx_rep == i]) for i in idx])
data_out = np.bincount(idx_rep, weights=data)
print x_out
print y_out
print z_out
print data_out

Categories