Optimization of a Needleman Wunsch algorithm - python
For some Time, I've written a Needleman Wunsch algorithm for listening to NCBI data streams, but as I've been done with it i knew it could be done better but i by far to stupid to do so. If some of you know what I've done stupid overcomplicated, I would really like to know how to do it better.
At first I want to show you the full code to let you know about the context.
#!/usr/bin/env python
import pandas as pd #to create data frames
from numpy import full #to build the arrays
import subprocess
import os
from itertools import groupby
def needlemanWunsch(lista, listb, rowLen, colLen): #o(n*m)
levensteinTable = full([rowLen, colLen],0)
for j in range(0,colLen): #Fill out the first row
levensteinTable[0, j] = j
for i in range(0,rowLen): #Fill out the first column
levensteinTable[i, 0] = i
for i in range(1,rowLen): #Fill out the rest of the Board in dependency of several cases
for j in range (1,colLen): #traversing with a nested loop
levensteinTable[i,j]=min( #every Levenstein Score has to be the Minimum of three cases
(levensteinTable[i-1, j-1]+(0 if listb[j-1] == lista[i-1] else 1)), #in this part, the Delta of the two strings will be detected
(levensteinTable[i, j-1] +1), #The score of the left cell
(levensteinTable[i-1, j] +1) #The Score of the upper cell
)
return levensteinTable #the finished levenstein Table will be returned
upArrow = "\u2191"
right_arrow = "\u2192"
down_arrow = "\u2193"
leftArrow = "\u2190"
down_right_arrow = "\u2198"
upLeftArrow = "\u2196"
def needlemanWunschTraceBack(listx,listy,rows,columns, gapPenalty = -1, matchBonus = 1, mismatchPenalty = -1): #o(n*m)
#This algorithm will fill out the Penalty Scoreboard and the Arrow Score board
penaltyArray = full([rows, columns],0) #build up the penalty score Array
tracerArray = full([rows, columns],"-") #build up the Array for the traceback arrows
for row in range(rows): #filling the two arrays
for col in range(columns):
if row==0 and col==0: #the first cell
score = 0 #the first Cell doesn't have any alignment
arrow = "-" #no Alignment, no arrow
elif row==0 and col!=0: #the first Row
score = penaltyArray[row, col -1]+gapPenalty #every Cell in this row need his score
arrow = leftArrow #every Arrow in this row has to look up to the start of the array
elif row!=0 and col == 0: #the first column
score = penaltyArray[row-1, col]+gapPenalty #every Cell in this column need his score
arrow = upArrow #every Arrow in this row has to look up to the start of the array
else: #in this case, every other cell will be processed
fromLeftScore = penaltyArray[row,col-1] + gapPenalty #The score of the former left cell + gapPenalty
fromAboveScore = penaltyArray[row-1,col] + gapPenalty #The score of the former Above cell
diagonalLeftCellScore = penaltyArray[row-1,col-1] +(
matchBonus if(listx[row -1]==listy[col-1])else mismatchPenalty
) #if the alighment matches with the former diagonal left cell, it gets a match bonus, else a mismatchPenalty
score = max([fromLeftScore, fromAboveScore, diagonalLeftCellScore]) #The score of our Cell has to be the maximum of the three given scores
arrow =(leftArrow if score==fromLeftScore else upArrow if score==fromAboveScore else\
upLeftArrow if score==diagonalLeftCellScore else 0)
#the right arrow of every cell will be seperately detected
tracerArray[row, col]=arrow #the tracerArray gets the arrows
penaltyArray[row, col]=score #the penaltyArray gets the scores
return tracerArray, penaltyArray
def traceback_alignment(traceback_array,listC,listD,up_arrow = upArrow ,\
left_arrow=leftArrow,up_left_arrow=upLeftArrow,stop="-"): #o(n)
row = len(listC) #The Traceback Algo needs the sequences anyway
col = len(listD)
arrow = traceback_array[row,col] #to get the right arrow for the current position
alignedSeq1 = "" #to initiate the produced alignment upper line
alignedSeq2 = "" #to initiate the produced alignment under line
alignmentIndicator = "" #to indicate the alighment
while arrow != "-": #No Arrow, no interes
arrow = traceback_array[row,col] #the current position in the array inside the loop
print(f"Currently on row: {row} and col: {col}; Arrow: {arrow}") #Because you could get bored without visual process indication
if arrow == up_arrow: #up_arrow shows a gap in under sequence
alignedSeq2 = "-"+alignedSeq2
alignedSeq1 = listC[row-1] + alignedSeq1
alignmentIndicator = " "+alignmentIndicator #to show that here is no alignment
row -=1
elif arrow == up_left_arrow: #up_left_arrow shows that here is accordance between the sequences
alignedSeq1 = listC[row-1] + alignedSeq1
alignedSeq2 = listD[col-1] + alignedSeq2
if listC[row-1] == listD[col-1]:
alignmentIndicator = "|"+alignmentIndicator #visual indicator for accordance
else:
alignmentIndicator = " "+alignmentIndicator #visual indicator for no accordance
row -=1
col -=1
elif arrow == left_arrow:
alignedSeq1 = "-"+alignedSeq1
alignedSeq2 = listD[col-1] + alignedSeq2
alignmentIndicator = " "+alignmentIndicator #visual indicator for no accordance
col -=1
elif arrow == stop:
break
else:
raise ValueError(
f"Traceback array entry at {row},{col}: {arrow}" \
f"is not recognized as an up arrow ({up_arrow}),left_arrow ({left_arrow}), "\
f"up_left_arrow ({up_left_arrow}), or a stop ({stop})."
)
return f"{alignedSeq1}\n{alignmentIndicator}\n{alignedSeq2}"
def seqHandle(seq1, seq2): #to handle a two given Sequences o(n*m)
columnLabels = [label for label in "-"+seq1] #for the later buildet Dataframes
rowLabels = [label for label in "-"+seq2] ##for the later buildet Dataframes
nRows = len("-"+seq1) #Count of all rows
nColumns = len("-"+seq2) #Count of all columns
levensteinBoard = needlemanWunsch(seq1, seq2,nRows,nColumns) #to build the Board with the levensteinDistances
arrowArray, zuchtArray, = needlemanWunschTraceBack(seq1, seq2,nRows, nColumns) #Important for the traceback
levensteinDistance = levensteinBoard[len(seq1)][len(seq2)] #I'll explain the Levenstein Distance in the Readme
return (
f"This is our ScoreBoard with all the important distances\n"\
f"{pd.DataFrame(levensteinBoard, index=columnLabels, columns= rowLabels)}\n"\
f"The Levenstein Distance of{seq1} and {seq2} is {levensteinDistance}.\n"\
f"The trace back arrow board:\n{pd.DataFrame(arrowArray, index=columnLabels, columns= rowLabels)}\n"\
f"The Penalty Score Board:\n{pd.DataFrame(zuchtArray, index=columnLabels, columns= rowLabels)}\n"\
f"{traceback_alignment(arrowArray,seq1,seq2)}"), levensteinDistance
def fileProcessGenerator(fileE): #to iterate over a SequenceSummaryFile o(n)
with open(f'{os.getcwd()}/{fileE}', 'r') as fh: #to savely work with the given file
faiter = (x[1] for x in groupby(fh, lambda line: line[0] ==">")) #to group the single sequences to their related header Line
for header in faiter:
headerStr = header.__next__()[1:].strip() #to fetch the header line from the group
yield (
headerStr.strip().replace('>', '').split()[0], #name of the sequence
''.join(s.strip() for s in faiter.__next__())) #sequence
def FileOfSequencesAnalisis(fileName, OutputPath): #to analyse a SequenceSummaryFile o(n*m*a*b)
print(f"\n\n Warning: I'll proceed really a lot of Sequence combinations Sequences.\n\n This will take Time! Please stay patient")
processnumber = 1 #We want to know, in which process we are
levensteinDistanceCounter= 0 #To count all Levenstein distances so far
LevensteinDistancesAverage = 0 #to calculate the average of all levenstein distances so far
try: #because someone could try shit
for ff in fileProcessGenerator(fileName): #the main loop devines the sequence in the line
for fo in fileProcessGenerator(fileName): #the main loop devines the sequence in the column
if ff != fo: #nobody wants to know the levenstein Distance of two identical sequences
name, seqOne, = ff #to get the important stuff from the generator
namel, seqTwo, = fo
print(
f"processing for {name} and {namel} -Process Nr.{processnumber}\n"\
f"current average Levenstein Distances is {LevensteinDistancesAverage}") #to show, that the process works well
seqHandling, currentLevensteinDistance = seqHandle(seqOne, seqTwo) #to get the struff from seqHandle
with open(f"{OutputPath}/For_{name}_and_{namel}.txt", 'w') as bitch: #to savely save our Output to a file
bitch.write(f"Analysis for {name} and {namel}: \n{seqHandling}")
levensteinDistanceCounter += currentLevensteinDistance
LevensteinDistancesAverage = levensteinDistanceCounter/processnumber
processnumber += 1
except RuntimeError:
print("invalid File. Use a File with inherited fasta Sequences with a Header Line and Sequences")
print(f"I'm done. Here is the average Levenstein Distance of the analysed File:\n{LevensteinDistancesAverage}")
Now I'll show you, what I've been unsatisfied with
def needlemanWunsch(lista, listb, rowLen, colLen): #o(n*m)
levensteinTable = full([rowLen, colLen],0)
for j in range(0,colLen): #Fill out the first row
levensteinTable[0, j] = j
for i in range(0,rowLen): #Fill out the first column
levensteinTable[i, 0] = i
for i in range(1,rowLen): #Fill out the rest of the Board in dependency of several cases
for j in range (1,colLen): #traversing with a nested loop
levensteinTable[i,j]=min( #every Levenstein Score has to be the Minimum of three cases
(levensteinTable[i-1, j-1]+(0 if listb[j-1] == lista[i-1] else 1)), #in this part, the Delta of the two strings will be detected
(levensteinTable[i, j-1] +1), #The score of the left cell
(levensteinTable[i-1, j] +1) #The Score of the upper cell
)
return levensteinTable #the finished levenstein Table will be returned
I've tried to turn every for loop into a comprehension, but somehow there are things I don't know and even don't know for what kind of comprehension I need to search for.
II've tried to turn every for loop into a comprehension, but somehow there are things I don't know and even don't know for what kind of comprehension I need to search for.
from numpy import full #to build the arrays
def needlemanWunsch(lista, listb, rowLen, colLen): #o(n*m)
levensteinTable = full([rowLen, colLen],0)
#for j in range(0,colLen): #Fill out the first row
# levensteinTable[0, j] = j
levensteinTable[0, [j for j in range(0, colLen)]]
#for i in range(0,rowLen): #Fill out the first column
# levensteinTable[i, 0] = i
levensteinTable[[i for i in range(0, rowLen)], 0]
for i in range(1,rowLen): #Fill out the rest of the Board in dependency of several cases
for j in range (1,colLen): #traversing with a nested loop
levensteinTable[i,j]=min( #every Levenstein Score has to be the Minimum of three cases
(levensteinTable[i-1, j-1]+(0 if listb[j-1] == lista[i-1] else 1)), #in this part, the Delta of the two strings will be detected
(levensteinTable[i, j-1] +1), #The score of the left cell
(levensteinTable[i-1, j] +1) #The Score of the upper cell
)
return levensteinTable #the finished levenstein Table will be returned
seq1, seq2= "ATTACA","ATGCT"
nRows, nColumns = len("-"+seq1), len("-"+seq2)
print(needlemanWunsch(seq1, seq2, nRows, nColumns))
but Output of this is like:
[[0 0 0 0 0 0]
[0 0 1 1 1 1]
[0 1 0 1 2 1]
[0 1 1 1 2 2]
[0 0 1 2 2 3]
[0 1 1 2 2 3]
[0 0 1 2 3 3]]
Related
Including backtrace pointers with symbols in a Python output of a DP table for min edit distance algorythm
I have a working min edit distance program for two words that also iterates and outputs a DP table to the console, however I wish to add a backtrace to the programme that also prints arrow pointer symbols in the DP table to show the backtrace clearly on the output. I cannot figure out how to output these symbols correctly. import re import time time1= time.time() def printTable(table, description): print(f'{description}\n') current_row = current_col = 0 current_row_col = re.search("^row ([0-9]+) , col ([0-9]+)$",description) if current_row_col: current_row = int(current_row_col.group(1)) current_col= int(current_row_col.group(2)) row_counter=0 for row in table: row_counter+=1 col_counter=0 for col in row: col_counter+=1 #print(row_counter , row, current_col, col) if (row_counter == current_row) and (col_counter == current_col): formatting = '\033[1m'+'\033[91m' #bold + red else: formatting = '\x1b[0m' #reset fomatting print(formatting + str(col).rjust(10, ' '), end=' ') # rjust returns a 10-characters long, right justified version of the string print('\n\n') print('---------------------------------------------------------------------------------------------------------------') # A DP-based solution for edit distance problem def editDistDP(x,y): leftarrow = "←" uparrow = "↑" diagarrow = "" dp = [] # Create an empty table to store results of subproblems # fill in the table with zeros for row in range(len(x) + 1): dp.append([0]* (len(y) + 1)) # Alternatively, you can use List Comprehension to initiate the DP table in one line of code # dp = [[0 for column in range(len(y) + 1)] for row in range(len(x) + 1)] # Fill in the base case (easy) subproblems, i.e. the first row and column of the DP table # first row: base case subproblems for computing the cost of converting "" to y for i in range(len(y) + 1): # If x is empty then the only option is to insert all the characters of y # Minimum number of required operations (cost) is i insertions, where i = len(y) dp[0][i] = i # first column: base case subproblems for computing the cost of converting x to "" for i in range(len(x) + 1): # If y is empty then the only option is to delete all the characters of x # Minimum number of required operations (cost) is i deletions, where i = len(x) dp[i][0] = i printTable(dp,"DP table after the base case (easy) subproblems are solved"); # Fill in the rest of the DP table in a BOTTOM-UP manner for i in range(1, len(x) + 1): for j in range(1, len(y) + 1): horizontal_or_insertion_cost = (dp[i][j-1] + 1) vertical_or_deletion_cost= dp[i-1][j] + 1 # Weighted Minimum Edit Distance for sub if x[i-1] != y[j-1] and x[i-1].isnumeric: delta = 3 elif x[i-1] != y[j-1]: delta = 2 else: delta = 0 diagonal_or_substitution_cost= dp[i-1][j-1] + delta minValue = min(horizontal_or_insertion_cost,vertical_or_deletion_cost,diagonal_or_substitution_cost) dp[i][j] = minValue # printTable(dp,f'row {i+1} , col {j+1}') #UNCOMMENT this line to see how the DP table is filled at each step printTable(dp,"Completed DP table after all the subproblems are solved") return dp[-1][-1] str1, str2 = "intention", "execution" print(f'edit distance between "{str1}" and "{str2}": {editDistDP(str1, str2)}') time2 = time.time() execTime = time2-time1 execTime = str(execTime) print("--- Executed in: " + execTime + " seconds ---")
First Search Program - Artificial Intelligence for Robotics path printing
I'm new at Python programming and I'm doing my best to fully understand this code. Here we are printing the path for the First Search Program - Artificial Intelligence for Robotics algorithm. I know how the basic of these lines are working in general, but how they work here in this code. Could I get some clarification for this piece of code below, please?. This is below the piece of code: policy=[[' ' for row in range(len(grid[0]))] for col in range(len(grid))] x=goal[0] y=goal[1] policy[x][y]='*' while x !=init[0] or y !=init[1]: x2=x-delta[action[x][y]][0] y2=y-delta[action[x][y]][1] policy[x2][y2]= delta_name[action[x][y]] x=x2 y=y2 for i in range(len(policy)): print(policy[i]) This is the code: #grid format # 0 = navigable space # 1 = occupied space grid = [[0,0,1,0,0,0], [0,0,1,0,0,0], [0,0,0,0,1,0], [0,0,1,1,1,0], [0,0,0,0,1,0]] init = [0,0] #Start location is (0,0) which we put it in open list. goal = [len(grid)-1,len(grid[0])-1] #Our goal in (4,5) and here are the coordinates of the cell. #Below the four potential actions to the single field delta = [[-1 , 0], #up by subtracting one from the first dimention, I mean the demension of (0,0) [ 0 ,-1], #left [ 1 , 0], #down [ 0 , 1]] #right delta_name = ['^','<','V','>'] #The name of above actions cost = 1 #Each step costs you one def search(): #open list elements are of the type [g,x,y] #To check cells once they expanded and don't expand them again. We defined an array called closed #and its size as our grid. It has two values 0 & 1. 0 means open and 1 means closed. closed = [[0 for row in range(len(grid[0]))] for col in range(len(grid))] action=[[-1 for row in range(len(grid[0]))] for col in range(len(grid))] #We initialize the starting location as checked closed[init[0]][init[1]] = 1 # we assigned the cordinates and g value x = init[0] y = init[1] g = 0 #our open list will contain our initial value open = [[g, x, y]] found = False #flag that is set when search complete resign = False #Flag set if we can't find expand #print('initial open list:') #for i in range(len(open)): #print(' ', open[i]) #print('----') while found is False and resign is False: #Check if we still have elements in the open list if len(open) == 0: #If our open list is empty, there is nothing to expand. resign = True print('Fail') print('############# Search terminated without success') else: #if there is still elements on our list #remove node from list open.sort() #sort elements in an increasing order from the smallest g value up open.reverse() #reverse the list next = open.pop() #remove the element with the smallest g value from the list #print('list item') #print('next') #Then we assign the three values to x,y and g. Which is our expantion. x = next[1] y = next[2] g = next[0] #Check if we are done if x == goal[0] and y == goal[1]: found = True print(next) #The three elements above this "if". print('############## Search is success') else: #expand winning element and add to new open list for i in range(len(delta)): #going through all our actions the four actions #We apply the actions to x and y with additional delta to construct x2 and y2 x2 = x + delta[i][0] y2 = y + delta[i][1] #if x2 and y2 falls into the grid if x2 >= 0 and x2 < len(grid) and y2 >=0 and y2 <= len(grid[0])-1: #if x2 and y2 not checked yet and there is not obstacles if closed[x2][y2] == 0 and grid[x2][y2] == 0: g2 = g + cost #we increment the cose open.append([g2,x2,y2]) #we add them to our open list #print('append list item') #print([g2,x2,y2]) #Then we check them to never expand again closed[x2][y2] = 1 action[x2][y2]=i policy=[[' ' for row in range(len(grid[0]))] for col in range(len(grid))] x=goal[0] y=goal[1] policy[x][y]='*' while x !=init[0] or y !=init[1]: x2=x-delta[action[x][y]][0] y2=y-delta[action[x][y]][1] policy[x2][y2]= delta_name[action[x][y]] x=x2 y=y2 for i in range(len(policy)): print(policy[i]) search()
How to add a stopping condition for Jacobian Matrix?
def jacobi(m,numiter=100): #Number of rows determins the number of variables numvars = m.shape[0] #construct array for final iterations history = np.zeros((numvars,numiter)) i = 1 while(i < numiter): #Loop for numiter for v in range(numvars): # Loop over all variables current = m[v,numvars] # Start with left hand side (augmented side of matrix) for col in range(numvars): #Loop over columns if v != col: # Don't count colume for current variable current = current - (m[v,col]*history[col, i-1]) #subtract other guesses form previous timestep current = current/m[v,v] #divide by current variable coefficent history[v,i] = current #Add this answer to the rest i = i + 1 #iterate #plot each variable for v in range(numvars): plt.plot(history[v,: i]); return history[:,i-1] I have this code that calculates Jacobian method. How do I add a stopping condition for when the solutions converge? i.e. the values for the current iteration have changed less than some threshold e from the values for the previous iteration. The threshold e will be an input to the function and the default value to 0.00001
You could add another condition to your while loop, so when it reaches your error threshold it stops. def jacobi(m,numiter=100, error_threshold = 1e-4): #Number of rows determins the number of variables numvars = m.shape[0] #construct array for final iterations history = np.zeros((numvars,numiter)) i = 1 err = 10*error_threshold while(i < numiter and err > error_threshold): #Loop for numiter and error threshold for v in range(numvars): # Loop over all variables current = m[v,numvars] # Start with left hand side (augmented side of matrix) for col in range(numvars): #Loop over columns if v != col: # Don't count colume for current variable current = current - (m[v,col]*history[col, i-1]) #subtract other guesses form previous timestep current = current/m[v,v] #divide by current variable coefficent history[v,i] = current #Add this answer to the rest #check error here. In this case the maximum error if i > 1: err = max((history[:,i] - history[:,i-1])/history[:,i-1]) i = i + 1 #iterate #plot each variable for v in range(numvars): plt.plot(history[v,: i]); return history[:,i-1]
Spawning objects in groups when the first object of the group was spawned randomly Python
I'm currently doing a project, and in the code I have, I'm trying to get trees .*. and mountains .^. to spawn in groups around the first tree or mountain which is spawned randomly, however, I can't figure out how to get the trees and mountains to spawn in groups around a single randomly generated point. Any help? grid = [] def draw_board(): row = 0 for i in range(0,625): if grid[i] == 1: print("..."), elif grid[i] == 2: print("..."), elif grid[i] == 3: print(".*."), elif grid[i] == 4: print(".^."), elif grid[i] == 5: print("[T]"), else: print("ERR"), row = row + 1 if row == 25: print ("\n") row = 0 return
There's a number of ways you can do it. Firstly, you can just simulate the groups directly, i.e. pick a range on the grid and fill it with a specific figure. def generate_grid(size): grid = [0] * size right = 0 while right < size: left = right repeat = min(random.randint(1, 5), size - right) # * right = left + repeat grid[left:right] = [random.choice(figures)] * repeat return grid Note that the group size need not to be uniformly distributed, you can use any convenient distribution, e.g. Poisson. Secondly, you can use a Markov Chain. In this case group lengths will implicitly follow a Geometric distribution. Here's the code: def transition_matrix(A): """Ensures that each row of transition matrix sums to 1.""" copy = [] for i, row in enumerate(A): total = sum(row) copy.append([item / total for item in row]) return copy def generate_grid(size): # Transition matrix ``A`` defines the probability of # changing from figure i to figure j for each pair # of figures i and j. The grouping effect can be # obtained by setting diagonal entries A[i][i] to # larger values. # # You need to specify this manually. A = transition_matrix([[5, 1], [1, 5]]) # Assuming 2 figures. grid = [random.choice(figures)] for i in range(1, size): current = grid[-1] next = choice(figures, A[current]) grid.append(next) return grid Where the choice function is explained in this StackOverflow answer.
How to deal with very big Bitboards
I'm working on a 2-player board game (e.g. connect 4), with parametric board size h, w. I want to check for winning condition using hw-sized bitboards. In game like chess, where board size is fixed, bitboards are usually represented with some sort of 64-bit integer. When h and w are not constant and maybe very big (let's suppose 30*30) are bitboards a good idea? If so, are the any data types in C/C++ to deal with big bitboards keeping their performances? Since I'm currently working on python a solution in this language is appreciated too! :) Thanks in advance
I wrote this code while ago just to play around with the game concept. There is no intelligence behaviour involve. just random moves to demonstrate the game. I guess this is not important for you since you are only looking for a fast check of winning conditions. This implementation is fast since I did my best to avoid for loops and use only built-in python/numpy functions (with some tricks). import numpy as np row_size = 6 col_size = 7 symbols = {1:'A', -1:'B', 0:' '} def was_winning_move(S, P, current_row_idx,current_col_idx): #****** Column Win ****** current_col = S[:,current_col_idx] P_idx= np.where(current_col== P)[0] #if the difference between indexes are one, that means they are consecutive. #we need at least 4 consecutive index. So 3 Ture value is_idx_consecutive = sum(np.diff(P_idx)==1)>=3 if is_idx_consecutive: return True #****** Column Win ****** current_row = S[current_row_idx,:] P_idx= np.where(current_row== P)[0] is_idx_consecutive = sum(np.diff(P_idx)==1)>=3 if is_idx_consecutive: return True #****** Diag Win ****** offeset_from_diag = current_col_idx - current_row_idx current_diag = S.diagonal(offeset_from_diag) P_idx= np.where(current_diag== P)[0] is_idx_consecutive = sum(np.diff(P_idx)==1)>=3 if is_idx_consecutive: return True #****** off-Diag Win ****** #here 1) reverse rows, 2)find new index, 3)find offest and proceed as diag reversed_rows = S[::-1,:] #1 new_row_idx = row_size - 1 - current_row_idx #2 offeset_from_diag = current_col_idx - new_row_idx #3 current_off_diag = reversed_rows.diagonal(offeset_from_diag) P_idx= np.where(current_off_diag== P)[0] is_idx_consecutive = sum(np.diff(P_idx)==1)>=3 if is_idx_consecutive: return True return False def move_at_random(S,P): selected_col_idx = np.random.permutation(range(col_size))[0] #print selected_col_idx #we should fill in matrix from bottom to top. So find the last filled row in col and fill the upper row last_filled_row = np.where(S[:,selected_col_idx] != 0)[0] #it is possible that there is no filled array. like the begining of the game #in this case we start with last row e.g row : -1 if last_filled_row.size != 0: current_row_idx = last_filled_row[0] - 1 else: current_row_idx = -1 #print 'col[{0}], row[{1}]'.format(selected_col,current_row) S[current_row_idx, selected_col_idx] = P return (S,current_row_idx,selected_col_idx) def move_still_possible(S): return not (S[S==0].size == 0) def print_game_state(S): B = np.copy(S).astype(object) for n in [-1, 0, 1]: B[B==n] = symbols[n] print B def play_game(): #initiate game state game_state = np.zeros((6,7),dtype=int) player = 1 mvcntr = 1 no_winner_yet = True while no_winner_yet and move_still_possible(game_state): #get player symbol name = symbols[player] game_state, current_row, current_col = move_at_random(game_state, player) #print '******',player,(current_row, current_col) #print current game state print_game_state(game_state) #check if the move was a winning move if was_winning_move(game_state,player,current_row, current_col): print 'player %s wins after %d moves' % (name, mvcntr) no_winner_yet = False # switch player and increase move counter player *= -1 mvcntr += 1 if no_winner_yet: print 'game ended in a draw' player = 0 return game_state,player,mvcntr if __name__ == '__main__': S, P, mvcntr = play_game() let me know if you have any question UPDATE: Explanation: At each move, look at column, row, diagonal and secondary diagonal that goes through the current cell and find consecutive cells with the current symbol. avoid scanning the whole board. extracting cells in each direction: column: current_col = S[:,current_col_idx] row: current_row = S[current_row_idx,:] Diagonal: Find the offset of the desired diagonal from the main diagonal: diag_offset = current_col_idx - current_row_idx current_diag = S.diagonal(offset) off-diagonal: Reverse the rows of matrix: S_reversed_rows = S[::-1,:] Find the row index in the new matrix new_row_idx = row_size - 1 - current_row_idx current_offdiag = S.diagonal(offset)