Series calculation based on shifted values / recursive algorithm - python

I have the following:
df['PositionLong'] = 0
df['PositionLong'] = np.where(df['Alpha'] == 1, 1, (np.where(np.logical_and(df['PositionLong'].shift(1) == 1, df['Bravo'] == 1), 1, 0)))
This lines basically only take in df['Alpha'] but not the df['PositionLong'].shift(1).. It cannot recognize it but I dont understand why?
It produces this:
df['Alpha'] df['Bravo'] df['PositionLong']
0 0 0
1 1 1
0 1 0
1 1 1
1 1 1
However what I wanted the code to do is this:
df['Alpha'] df['Bravo'] df['PositionLong']
0 0 0
1 1 1
0 1 1
1 1 1
1 1 1
I believe the solution is to loop each row, but this will take very long.
Can you help me please?

You are looking for a recursive function, since a previous PositionLong value depends on Alpha, which itself is used to determine PositionLong.
But numpy.where is a regular function, so df['PositionLong'].shift(1) is evaluated as a series of 0 values, since you initialise the series with 0.
A manual loop need not be expensive. You can use numba to efficiently implement your recursive algorithm:
from numba import njit
#njit
def rec_algo(alpha, bravo):
res = np.empty(alpha.shape)
res[0] = 1 if alpha[0] == 1 else 0
for i in range(1, len(res)):
if (alpha[i] == 1) or ((res[i-1] == 1) and bravo[i] == 1):
res[i] = 1
else:
res[i] = 0
return res
df['PositionLong'] = rec_algo(df['Alpha'].values, df['Bravo'].values).astype(int)
Result:
print(df)
Alpha Bravo PositionLong
0 0 0 0
1 1 1 1
2 0 1 1
3 1 1 1
4 1 1 1

Related

Mimicing 'n' Nested For Loops Using Recursion

I would like to create some number of for-loops equal to the length of a list, and iterate through the values in that list. For example, if I had the list:
[1,2,3,4]
I would like the code to function like:
for i in range(1):
for j in range(2):
for k in range(3):
for l in range(4):
myfunc(inputs)
I understand I would need to do this recursively, but I'm not quite sure how. Ideally, I would even be able to iterate through these list values by a variable step; perhaps I want to count by two's for one loop, by .8's for another, etc. In that case, I would probably deliver the information in a format like this:
[[value,step],[value,step] ... [value,step],[value,step]]
So, how could I do this?
Not quite sure what you want at the very end, but here's a way to recursively set-up your loops:
test = [1,2,3,4]
def recursive_loop(test):
if len(test) == 1:
for i in range(test[0]):
print('hi') # Do whatever you want here
elif len(test) > 1:
for i in range(test[0]):
recursive_loop(test[1:])
recursive_loop(test)
You can certainly do it with recursion, but there's already a library function for that:
itertools.product
from itertools import product
def nested_loops(myfunc, l):
for t in product(*(range(n) for n in l)):
myfunc(*t)
## OR EQUIVALENTLY
# def nested_loops(myfunc, l):
# for t in product(*map(range, l)):
# myfunc(l)
nested_loops(print, [1, 2, 3, 4])
# 0 0 0 0
# 0 0 0 1
# 0 0 0 2
# 0 0 0 3
# 0 0 1 0
# 0 0 1 1
# ...
# 0 1 2 1
# 0 1 2 2
# 0 1 2 3
You can of course include steps too. Library function zip can be useful.
def nested_loops_with_steps_v1(myfunc, upperbounds, steps):
for t in product(*(range(0, n, s) for n,s in zip(upperbounds, steps))):
myfunc(*t)
nested_loops_with_steps_v1(print, [1,2,8,10], [1,1,4,5])
# 0 0 0 0
# 0 0 0 5
# 0 0 4 0
# 0 0 4 5
# 0 1 0 0
# 0 1 0 5
# 0 1 4 0
# 0 1 4 5
Or if your steps and upperbounds are already zipped together:
def nested_loops_with_steps_v2(myfunc, l):
for t in product(*(range(0, n, s) for n,s in l)):
myfunc(*t)
nested_loops_with_steps_v2(print, [(1,1),(2,1),(8,4),(10,5)])
# 0 0 0 0
# 0 0 0 5
# 0 0 4 0
# 0 0 4 5
# 0 1 0 0
# 0 1 0 5
# 0 1 4 0
# 0 1 4 5

how to change string matrix to a integer matrix

I have a voting dataset like that:
republican,n,y,n,y,y,y,n,n,n,y,?,y,y,y,n,y
republican,n,y,n,y,y,y,n,n,n,n,n,y,y,y,n,?
democrat,?,y,y,?,y,y,n,n,n,n,y,n,y,y,n,n
democrat,n,y,y,n,?,y,n,n,n,n,y,n,y,n,n,y
but they are both string so I want to change them to integer matrix and make statistic
hou_dat = pd.read_csv("house.data", header=None)
for i in range (0, hou_dat.shape[0]):
for j in range (0, hou_dat.shape[1]):
if hou_dat[i, j] == "republican":
hou_dat[i, j] = 2
if hou_dat[i, j] == "democrat":
hou_dat[i, j] = 3
if hou_dat[i, j] == "y":
hou_dat[i, j] = 1
if hou_dat[i, j] == "n":
hou_dat[i, j] = 0
if hou_dat[i, j] == "?":
hou_dat[i, j] = -1
hou_sta = hou_dat.apply(pd.value_counts)
print(hou_sta)
however, it shows error, how to solve it?:
Exception has occurred: KeyError
(0, 0)
IIUC, you need map and stack
map_dict = {'republican' : 2,
'democrat' : 3,
'y' : 1,
'n' : 0,
'?' : -1}
df1 = df.stack().map(map_dict).unstack()
print(df1)
0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
0 2 0 1 0 1 1 1 0 0 0 1 -1 1 1 1 0 1
1 2 0 1 0 1 1 1 0 0 0 0 0 1 1 1 0 -1
2 3 -1 1 1 -1 1 1 0 0 0 0 1 0 1 1 0 0
3 3 0 1 1 0 -1 1 0 0 0 0 1 0 1 0 0 1
If you're dealing with data from csv, it is better to use pandas' methods.
In this case, you have replace method to do exactly what you asked for.
hou_dat.replace(to_replace={'republican':2, 'democrat':3, 'y':1, 'n':0, '?':-1}, inplace=True)
You can read more about it in this documentation

Python numpy zeros array being assigned 1 for every value when only one index is updated

The following is my code:
amount_features = X.shape[1]
best_features = np.zeros((amount_features,), dtype=int)
best_accuracy = 0
best_accuracy_index = 0
def find_best_features(best_features, best_accuracy):
for i in range(amount_features):
trial_features = best_features
trial_features[i] = 1
svc = SVC(C = 10, gamma = .1)
svc.fit(X_train[:,trial_features==1],y_train)
y_pred = svc.predict(X_test[:,trial_features==1])
accuracy = metrics.accuracy_score(y_test,y_pred)
if (accuracy > best_accuracy):
best_accuracy = accuracy
best_accuracy_index = i
print(best_accuracy_index)
best_features[best_accuracy_index] = 1
return best_features, best_accuracy
bf, ba = find_best_features(best_features, best_accuracy)
print(bf, ba)
And this is my output:
25
[1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1] 0.865853658537
And my expected output:
25
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0] 0.865853658537
I am trying to update the zeros array with the index that gives the highest accuracy. As you see it should be index 25, and I follow that by assigning the 25 index for my array equal to 1. However, when I print the array it shows every index has been updated to 1.
Not sure what is the mishap. Thanks for spending your limited time on Earth to help me.
Change trial_features = best_features to trial_features = numpy.copy(best_features). Reasoning behind the change is already given by #Michael Butscher.

How do I add to a grid coordinate in python?

What I'm trying to do is have a 2D array and for every coordinate in the array, ask all the other 8 coordinates around it if they have stored a 1 or a 0. Similar to a minesweeper looking for mines.
I used to have this:
grid = []
for fila in range(10):
grid.append([])
for columna in range(10):
grid[fila].append(0)
#edited
for fila in range (10):
for columna in range (10):
neighbour = 0
for i in range 10:
for j in range 10:
if gird[fila + i][columna + j] == 1
neighbour += 1
But something didn't work well. I also had print statments to try to find the error that way but i still didnt understand why it only made half of the for loop. So I changed the second for loop to this:
#edited
for fila in range (10):
for columna in range (10):
neighbour = 0
if grid[fila - 1][columna - 1] == 1:
neighbour += 1
if grid[fila - 1][columna] == 1:
neighbour += 1
if grid[fila - 1][columna + 1] == 1:
neighbour += 1
if grid[fila][columna - 1] == 1:
neighbour += 1
if grid[fila][columna + 1] == 1:
neighbour += 1
if grid[fila + 1][columna - 1] == 1:
neighbour += 1
if grid[fila + 1][columna] == 1:
neighbour += 1
if grid[fila + 1][columna + 1] == 1:
neighbour += 1
And got this error:
if grid[fila - 1][columna + 1] == 1:
IndexError: list index out of range
It seems like I can't add on the grid coordinates but I can subtract. Why is that?
Valid indices in python are -len(grid) to len(grid)-1. the positive indices are accessing elements with offset from the front, the negative ones from the rear. adding gives a range error if the index is greater than len(grid)-1 that is what you see. subtracting does not give you a range error unless you get an index value less than -len(grid). although you do not check for the lower bound, which is 0 (zero) it seems to work for you as small negative indices return you values from the rear end. this is a silent error leading to wrong neighborhood results.
If you are computing offsets, you need to make sure your offsets are within the bounds of the lists you have. So if you have 10 elements, don't try to access the 11th element.
import collections
grid_offset = collections.namedtuple('grid_offset', 'dr dc')
Grid = [[0 for c in range(10)] for r in range(10)]
Grid_height = len(Grid)
Grid_width = len(Grid[0])
Neighbors = [
grid_offset(dr, dc)
for dr in range(-1, 2)
for dc in range(-1, 2)
if not dr == dc == 0
]
def count_neighbors(row, col):
count = 0
for nb in Neighbors:
r = row + nb.dr
c = col + nb.dc
if 0 <= r < Grid_height and 0 <= c < Grid_width:
# Add the value, or just add one?
count += Grid[r][c]
return count
Grid[4][6] = 1
Grid[5][4] = 1
Grid[5][5] = 1
for row in range(10):
for col in range(10):
print(count_neighbors(row, col), "", end='')
print()
Prints:
$ python test.py
0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 1 1 1 0 0
0 0 0 1 2 3 1 1 0 0
0 0 0 1 1 2 2 1 0 0
0 0 0 1 2 2 1 0 0 0
0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0
The error is exactly what it says, you need to check if the coordinates fit within the grid:
0 <= i < 10 and 0 <= j < 10
Otherwise you're trying to access an element that doesn't exist in memory, or an element that's not the one you're actually thinking about - Python handles negative indexes, they're counted from the end.
E.g. a[-1] is the last element, exactly the same as a[len(a) - 1].

Python Ignoring What is in a list?

Working on a project for CS1 that prints out a grid made of 0s and adds shapes of certain numbered sizes to it. Before it adds a shape it needs to check if A) it will fit on the grid and B) if something else is already there. The issue I am having is that when run, the function that checks to make sure placement for the shapes is valid will always do the first and second shapes correctly, but any shape added after that will only "see" the first shape added when looking for a collision. I checked to see if it wasnt taking in the right list after the first time but that doesnt seem to be it. Example of the issue....
Shape Sizes = 4, 3, 2, 1
Python Outputs:
4 4 4 4 1 2 3 0
4 4 4 4 2 2 3 0
4 4 4 4 3 3 3 0
4 4 4 4 0 0 0 0
0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0
It Should Output:
4 4 4 4 3 3 3 1
4 4 4 4 3 3 3 0
4 4 4 4 3 3 3 0
4 4 4 4 2 2 0 0
0 0 0 0 2 2 0 0
0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0
What's going on here? Full Code is below...
def binCreate(size):
binlist = [[0 for col in range(size)] for row in range(size)]
return binlist
def binPrint(lst):
for row in range(len(lst)):
for col in range(len(lst[row])):
print(lst[row][col], end = " ")
print()
def itemCreate(fileName):
lst = []
for i in open(fileName):
i = i.split()
lst = i
lst = [int(i) for i in lst]
return lst
def main():
size = int(input("Bin Size: "))
fileName = str(input("Item Size File: "))
binList = binCreate(size)
blockList = itemCreate(fileName)
blockList.sort(reverse = True)
binList = checker(binList, len(binList), blockList)
binPrint(binList)
def isSpaceFree(binList, r, c, size):
if r + size > len(binList[0]):
return False
elif c + size > len(binList[0]):
return False
for row in range(r, r + size):
for col in range(c, c + size):
if binList[r][c] != 0:
return False
elif binList[r][c] == size:
return False
return True
def checker(binList, gSize, blockList):
for i in blockList:
r = 0
c = 0
comp = False
while comp != True:
check = isSpaceFree(binList, r, c, i)
if check == True:
for x in range(c, c+ i):
for y in range(r, r+ i):
binList[x][y] = i
comp = True
else:
print(c)
print(r)
r += 1
if r > gSize:
r = 0
c += 1
if c > gSize:
print("Imcompadible")
comp = True
print(i)
binPrint(binList)
input()
return binList
Your code to test for open spaces looks in binList[r][c] (where r is a row value and c is a column value). However, the code that sets the values once an open space has been found sets binList[x][y] (where x is a column value and y is a row value).
The latter is wrong. You want to set binList[y][x] instead (indexing by row, then column).
That will get you a working solution, but it will still not be exactly what you say you expect (you'll get a reflection across the diagonal). This is because your code updates r first, then c only when r has exceeded the bin size. If you want to place items to the right first, then below, you need to swap them.
I'd suggest using two for loops for r and c, rather than a while too, but to make it work in an elegant way you'd probably need to factor out the "find one item's place" code so you could return from the inner loop (rather than needing some complicated code to let you break out of both of the nested loops).

Categories