Pacman minimax algorithm - my algorithm makes the wrong choice - python

When i do the task - I pass all the tests except from the fact that my choice is not the same as the optimal. I dont understand why that is - or what i need to change in my code. I have tried everything, but there seeems to be a fundamental error somewhere in the algorith that makes my algorith choose Right even tho the best action is Left
def max_value(self, gameState, depth, agentIndex):
pacIndex = 0 #packmans index
bound = float('-inf') #create an upper bound
#check if we have a terminal state or leaf node
if gameState.isWin() or gameState.isLose() or depth == self.depth:
return self.evaluationFunction(gameState), None
if gameState.getNumAgents() == 0:
value, a = self.max_value(gameState, depth + 1,agentIndex)
#check for all childrennodes for packman
for action in gameState.getLegalActions(pacIndex):
#find each of the succesor from the action node
sucessor = gameState.generateSuccessor(pacIndex,action)
#now we minimize the next agent (ghost)
ghost_index = pacIndex + 1
value, result = self.min_value(sucessor,depth,ghost_index)
if value > bound: #if we have a higher one
bound = max(value,bound) #new boud, max of what we have found
maxresult = action
#return (bound,maxresult) [depth == 1]#return actions done for depth 1 now we move down
return bound, maxresult
def min_value (self, gameState, depth ,agentIndex):
bound = float('inf') #lower bound inifinity
minresult = None
agentNumber = gameState.getNumAgents() # find out whos turn it is
#check if we have terminal state
if gameState.isLose() or depth == self.depth:
return self.evaluationFunction(gameState), None
#get all the legal actions for the agent we are currently on
for action in gameState.getLegalActions(agentIndex):
#create the next nodes one step down
sucessor = gameState.generateSuccessor(agentIndex, action)
#check if there are ghosts left
if agentIndex == agentNumber-1:
#check if there are one the last node
#then we want to maximise for the packman #recccursive lol
pacIndex = 0
value, maxresult = self.max_value(sucessor,depth + 1,pacIndex)
else:
#for the ghosts
value, result = self.min_value(sucessor,depth,agentIndex+1)
if value < bound :
bound = min(value,bound)
minresult = action
return bound, minresult
#now the minmax function
def minimax(self,gameState):
depth = 0
a, maxresult = self.max_value(gameState, 0, 0)
return maxresult
```

Related

There is 8 puzzle game using a* algo. i want to set traversal limit of 2000 traversal in python

this is sample code of 8 puzzle game which take two matrix initial and goal state .
class Node:
def __init__(self,data,level,fval):
""" Initialize the node with the data, level of the node and the calculated fvalue """
self.data = data
self.level = level
self.fval = fval
def generate_child(self):
""" Generate child nodes from the given node by moving the blank space
either in the four directions {up,down,left,right} """
x,y = self.find(self.data,'_')
""" val_list contains position values for moving the blank space in either of
the 4 directions [up,down,left,right] respectively. """
val_list = [[x,y-1],[x,y+1],[x-1,y],[x+1,y]]
children = []
for i in val_list:
child = self.shuffle(self.data,x,y,i[0],i[1])
if child is not None:
child_node = Node(child,self.level+1,0)
children.append(child_node)
return children
def shuffle(self,puz,x1,y1,x2,y2):
""" Move the blank space in the given direction and if the position value are out
of limits the return None """
if x2 >= 0 and x2 < len(self.data) and y2 >= 0 and y2 < len(self.data):
temp_puz = []
temp_puz = self.copy(puz)
temp = temp_puz[x2][y2]
temp_puz[x2][y2] = temp_puz[x1][y1]
temp_puz[x1][y1] = temp
return temp_puz
else:
return None
def copy(self,root):
""" Copy function to create a similar matrix of the given node"""
temp = []
for i in root:
t = []
for j in i:
t.append(j)
temp.append(t)
return temp
def find(self,puz,x):
""" Specifically used to find the position of the blank space """
for i in range(0,len(self.data)):
for j in range(0,len(self.data)):
if puz[i][j] == x:
return i,j
class Puzzle:
def __init__(self,size):
""" Initialize the puzzle size by the specified size,open and closed lists to empty """
self.n = size
self.open = []
self.closed = []
def accept(self):
""" Accepts the puzzle from the user """
puz = []
for i in range(0,self.n):
temp = input().split(" ")
puz.append(temp)
return puz
def f(self,start,goal):
""" Heuristic Function to calculate hueristic value f(x) = h(x) + g(x) """
return self.h(start.data,goal)+start.level
def h(self,start,goal):
""" Calculates the different between the given puzzles """
temp = 0
for i in range(0,self.n):
for j in range(0,self.n):
if start[i][j] != goal[i][j] and start[i][j] != '_':
temp += 1
return temp
def process(self):
""" Accept Start and Goal Puzzle state"""
print("Enter the start state matrix \n")
start = self.accept()
print("Enter the goal state matrix \n")
goal = self.accept()
start = Node(start,0,0)
start.fval = self.f(start,goal)
""" Put the start node in the open list"""
self.open.append(start)
print("\n")
count=0
while True:
cur = self.open[0]
count=count+1
print("This Node number = \n", count)
print("")
print(" | ")
print(" | ")
print(" \\\'/ \n")
for i in cur.data:
for j in i:
print(j,end=" ")
print("")
""" If the difference between current and goal node is 0 we have reached the goal node"""
if(self.h(cur.data,goal) == 0):
break
for i in cur.generate_child():
i.fval = self.f(i,goal)
self.open.append(i)
self.closed.append(cur)
del self.open[0]
""" sort the opne list based on f value """
self.open.sort(key = lambda x:x.fval,reverse=False)
puz = Puzzle(3)
puz.process()
this code take initial state and goal state and start traversal and stop until
specific or reached goal state
i want to add limit of traversal to this code.so it itterate in
specific boundary either reached goal state or not

Mancala AI completes the game and wins in one round instead of playing one turn

I'm working on a Mancala game where players get to play against an AI. the code is complete, the Mancala game functionality is found within the Mancala_helpers, the AI algorithm is a MinMax tree and is found in the MinMax file, and finally the game itself. everything runs fine except for when the AI plays, if the AI starts the game immediately ends, it moves all the rocks from its pits in one round. and if I start I can only play one move before it does the same. I cannot understand what's happening, at first, I thought maybe I had a problem within the function of mancala helpers where they did not switch turns properly and the AI kept playing. but I ran multiple tests and that part is working fine. I cant identify the issue, help, please. if anyone also has suggestions for a better evaluation function then that would be great. thanks
--------------------------Mancala helpers--------------
# TODO: implement pad(num)
# Return a string representation of num that is always two characters wide.
# If num does not already have two digits, a leading "0" is inserted in front.
# This is called "padding". For example, pad(12) is "12", and pad(1) is "01".
# You can assume num is either one or two digits long.
def pad(num: int) -> str:
x = str(num)
if len(x) > 1:
return x
else:
return "0"+x
# TODO: implement pad_all(nums)
# Return a new list whose elements are padded versions of the elements in nums.
# For example, pad_all([12, 1]) should return ["12", "01"].
# Your code should create a new list, and not modify the original list.
# You can assume each element of nums is an int with one or two digits.
def pad_all(nums: list) -> list:
x = []
for i in nums:
x.append(pad(i))
return x
# TODO: implement initial_state()
# Return a (player, board) tuple representing the initial game state
# The initial player is player 0.
# board is list of ints representing the initial mancala board at the start of the game.
# The list element at index p should be the number of gems at position p.
def initial_state() -> tuple:
return (0, [4, 4, 4, 4, 4, 4, 0, 4, 4, 4, 4, 4, 4, 0])
# TODO: implement game_over(state)
# Return True if the game is over, and False otherwise.
# The game is over once all pits are empty.
# Your code should not modify the board list.
# The built-in functions "any" and "all" may be useful:
# https://docs.python.org/3/library/functions.html#all
def game_over(state: tuple) -> bool:
lst = state[1]
if (lst[0] == lst[1] == lst[2] == lst[3] == lst[4] == lst[5] == 0) or (lst[7] == lst[8] == lst[9] == lst[10] == lst[11] == lst[12] == 0):
return True
else:
return False
# TODO: implement valid_actions(state)
# state is a (player, board) tuple
# Return a list of all positions on the board where the current player can pick up gems.
# A position is a valid move if it is one of the player's pits and has 1 or more gems in it.
# For example, if all of player's pits are empty, you should return [].
# The positions in the returned list should be ordered from lowest to highest.
# Your code should not modify the board list.
def valid_actions(state: tuple) -> list:
actions = []
lst = state[1]
player = state[0]
if player == 0:
for i in range(6):
if lst[i] > 0:
actions.append(i)
return actions
else:
for i in range(6):
if lst[i+7] >0: actions.append(i+7)
return actions
# TODO: implement mancala_of(player)
# Return the numeric position of the given player's mancala.
# Player 0's mancala is on the right and player 1's mancala is on the left.
# You can assume player is either 0 or 1.
def mancala_of(player: int) -> int:
if player ==0: return 6
elif player==1: return 13
# TODO: implement pits_of(player)
# Return a list of numeric positions corresponding to the given player's pits.
# The positions in the list should be ordered from lowest to highest.
# Player 0's pits are on the bottom and player 1's pits are on the top.
# You can assume player is either 0 or 1.
def pits_of(player: int) -> list:
if player ==0:
return [0,1,2,3,4,5]
elif player==1:
return [7,8,9,10,11,12]
# TODO: implement player_who_can_do(move)
# Return the player (either 0 or 1) who is allowed to perform the given move.
# The move is allowed if it is the position of one of the player's pits.
# For example, position 2 is one of player 0's pits.
# So player_who_can_do(2) should return 0.
# You can assume that move is a valid position for one of the players.
def player_who_can_do(move: int) -> int:
if move in [0,1,2,3,4,5] : return 0
elif move in [7,8,9,10,11,12]: return 1
# TODO: implement opposite_from(position)
# Return the position of the pit that is opposite from the given position.
# Check the pdf instructions for the definition of "opposite".
def opposite_from(position: int) -> int:
d_p_1 = {}
d_p_1[0]=12
d_p_1[1]=11
d_p_1[2]=10
d_p_1[3]=9
d_p_1[4]=8
d_p_1[5]=7
d_p_1[7]=5
d_p_1[8]=4
d_p_1[9]=3
d_p_1[10]=2
d_p_1[11]=1
d_p_1[12]=0
return d_p_1[position]
# TODO: implement play_turn(move, board)
# Return the new game state after the given move is performed on the given board.
# The return value should be a tuple (new_player, new_board).
# new_player should be the player (0 or 1) whose turn it is after the move.
# new_board should be a list representing the new board state after the move.
#
# Parameters:
# board is a list representing the current state of the game board before the turn is taken.
# move is an int representing the position where the current player picks up gems.
# You can assume that move is a valid move for the current player who is taking their turn.
# Check the pdf instructions for the detailed rules of taking a turn.
#
# It may be helpful to use several of the functions you implemented above.
# You will also need control flow such as loops and if-statements.
# Lastly, the % (modulo) operator may be useful:
# (x % y) returns the remainder of x / y
# from: https://docs.python.org/3/library/stdtypes.html#numeric-types-int-float-complex
def play_turn(move: int, board: list) -> tuple:
player = player_who_can_do(move)
new_board = board
gems = new_board[move]
new_board[move] = 0
hasht = {}
hasht[0] =1
hasht[1] = 0
if player ==0:
x =0
offset = 1
gems_counter = gems
for i in range(gems):
if i + move + offset == 13: offset += 1
elif (i+move+offset) - 14 == 13: offset += 1
if i + move +offset > 13:
gem_position = (i+move+offset) - 14
else:
gem_position = i + move + offset
new_board[gem_position] += 1
gems_counter -= 1
if gems_counter ==0 and gem_position==6: x = 1
if gems_counter==0 and gem_position in pits_of(0) and new_board[gem_position] == 1 and new_board[opposite_from(gem_position)] > 0:
gems_from_myside = new_board[gem_position]
gems_from_opside = new_board[opposite_from(gem_position)]
new_board[6] = gems_from_myside+gems_from_opside
new_board[gem_position] = 0
new_board[opposite_from(gem_position)] = 0
return (hasht[x],new_board)
if player ==1:
x_2 = 1
offset = 1
gems_counter2 = gems
for i in range(gems):
if i + move + offset == 6: offset += 1
elif (i+move+offset) - 14 == 6: offset += 1
if i + move +offset > 13:
gem_position = (i+move+offset) - 14
else:
gem_position = i + move + offset
new_board[gem_position] += 1
gems_counter2 -= 1
if gems_counter2 == 0 and gem_position == 13: x_2 = 0
if gems_counter2==0 and gem_position in pits_of(1) and new_board[gem_position] == 1 and new_board[opposite_from(gem_position)] > 0:
gems_from_myside = new_board[gem_position]
gems_from_opside = new_board[opposite_from(gem_position)]
new_board[13] = gems_from_myside+gems_from_opside
new_board[gem_position] = 0
new_board[opposite_from(gem_position)] = 0
return (hasht[x_2],new_board)
# TODO: implement clear_pits(board)
# Return a new list representing the game state after clearing the pits from the board.
# When clearing pits, any gems in a player's pits get moved to that player's mancala.
# Check the pdf instructions for more detail about clearing pits.
def clear_pits(board: list) -> list:
length = len(board)
middle_index = length // 2
first_half = board[:middle_index]
second_half = board[middle_index:]
for i in range(6):
first_half[6] += first_half[i]
first_half[i]=0
second_half[6] += second_half[i]
second_half[i] = 0
return (first_half+second_half)
# This one is done for you.
# Plays a turn and clears pits if needed.
def perform_action(action, state):
player, board = state
new_player, new_board = play_turn(action, board)
if 0 in [len(valid_actions((0, new_board))), len(valid_actions((1, new_board)))]:
new_board = clear_pits(new_board)
return new_player, new_board
# TODO: implement score_in(state)
# state is a (player, board) tuple
# Return the score in the given state.
# The score is the number of gems in player 0's mancala, minus the number of gems in player 1's mancala.
def score_in(state: tuple) -> int:
lst = state[1]
return lst[6] - lst[13]
# TODO: implement is_tied(board)
# Return True if the game is tied in the given board state, False otherwise.
# A game is tied if both players have the same number of gems in their mancalas.
# You can assume all pits have already been cleared on the given board.
def is_tied(board: list) -> bool:
if board[mancala_of(0)] - board[mancala_of(1)] == 0: return True
else: return False
# TODO: implement winner_of(board)
# Return the winning player (either 0 or 1) in the given board state.
# The winner is the player with more gems in their mancala.
# You can assume it is not a tied game, and all pits have already been cleared.
def winner_of(board: list) -> int:
if board[mancala_of(0)] > board[mancala_of(1)]: return 0
elif board[mancala_of(0)] < board[mancala_of(1)]: return 1
# TODO: implement string_of(board)
def string_of(board: list) -> str:
new_board = pad_all(board)
return '\n {} {} {} {} {} {}\n {} {}\n {} {} {} {} {} {}\n'.format(new_board[12],new_board[11],new_board[10],new_board[9],new_board[8],new_board[7],new_board[13],new_board[6],new_board[0],new_board[1],new_board[2],new_board[3],new_board[4],new_board[5])
-----------------------MinMax AI-------------------------------------------------------------
from os import stat
import numpy as np
from mancala_helpers import *
# A simple evaluation function that simply uses the current score.
def simple_evaluate(state):
return score_in(state)
# TODO
# Implement a better evaluation function that outperforms the simple one.
def better_evaluate(state):
#lst = state[1]
#return score_in(state)/2
return None
# depth-limited minimax as covered in lecture
def minimax(state, max_depth, evaluate):
# returns chosen child state, utility
# base cases
if game_over(state): return None, score_in(state)
if max_depth == 0: return None, evaluate(state)
# recursive case
children = [perform_action(action, state) for action in valid_actions(state)]
results = [minimax(child, max_depth-1, evaluate) for child in children]
_, utilities = zip(*results)
player, board = state
if player == 0: action = np.argmax(utilities)
if player == 1: action = np.argmin(utilities)
return children[action], utilities[action]
# runs a competitive game between two AIs:
# better_evaluation (as player 0) vs simple_evaluation (as player 1)
def compete(max_depth, verbose=True):
state = initial_state()
while not game_over(state):
player, board = state
if verbose: print(string_of(board))
if verbose: print("--- %s's turn --->" % ["Better","Simple"][player])
state, _ = minimax(state, max_depth, [better_evaluate, simple_evaluate][player])
score = score_in(state)
player, board = state
if verbose:
print(string_of(board))
print("Final score: %d" % score)
return score
if __name__ == "__main__":
score = compete(max_depth=4, verbose=True)
----------------------------------------playing the game---------------------------
from os import stat
from mancala_helpers import *
from mancala_minimax import minimax, simple_evaluate
def get_user_action(state):
actions = list(map(str, valid_actions(state)))
player, board = state
prompt = "Player %d, choose an action (%s): " % (player, ",".join(actions))
while True:
action = input(prompt)
if action in actions: return int(action)
print("Invalid action, try again.")
if __name__ == "__main__":
max_depth = 1
state = initial_state()
while not game_over(state):
player, board = state
print(string_of(board))
if player == 0:
action = get_user_action(state)
state = perform_action(action, state)
else:
print("--- AI's turn --->")
#print(string_of(board))
print(state)
print(max_depth)
state, _ = minimax(state, max_depth, simple_evaluate)
#print(string_of(board))
player, board = state
print(string_of(board))
if is_tied(board):
print("Game over, it is tied.")
else:
winner = winner_of(board)
print("Game over, player %d wins." % winner)
the entire problem was in the mancala helper file. in the function play_turn() the 2nd line new_board = board. this was causing the issue because the original state should be immutable to work properly. any changes on new_board were also affecting board. the following new_board = copy.deepcopy(board) fixed everything. the function copy.deepcopy() creates a completely new copy, any changes applied to one of them does not affect the other.

Create a linked list function without creating another class

I am working on some practice exercises with linked lists and I got stuck with one function.
My program should create a Node class, take user input with create() function (number n and then takes in n number of elements), and has a function printLinkedList(p) to print it out. So far this works well but then I should create another function where I am going to be deleting the max element (if it occurs more than once, delete the first occurrence).
I found a function findMaxElement(p) that looks for the max, however, it doesn't work along my code (for example I get AttributeError: 'Node' object has no attribute 'head' error)
class Node:
def __init__(self, x = None):
self.data = x
self.next = None
def create():
n = int(input())
if n == 0:
return None
s = input().split()
p = Node(int(s[0]))
k = p
for i in range(1, n):
t = Node(int(s[i]))
k.next = t
k = t
return p
def printLinkedList(p):
if p == None:
print('Empty')
return
s = p
while s != None:
print(s.data, end = " ")
s = s.next
print()
def findMaxElement(p):
current = p.head
#Initializing max to initial node info
maximum = p.head.data
if(p.head == None):
print("List is empty")
else:
while(True):
#If current node's info is greater than max
#Then replace value of max with current node's info
if(maximum < current.info):
maximum = current.info
current= current.next
if(current == p.head):
break
return "Maximum value node in the list: "+ str(maximum)
#Driver code
a = create()
printLinkedList(a)
Input:
6
1 7 4 2 6 7
Expected result:
1 7 4 2 6 7
1 4 2 6 7
You could just define a findMaxElement() that traverses the linked-list in the same way that the printLinkedList() function is doing it (and finds the maximum value while doing so):
def findMaxElement(p):
if p == None:
return 'Empty List!'
current = p
maximum = p.data
while current != None: # Not end of list.
if current.data > maximum:
maximum = current.data
current = current.next
return "Maximum value node in the list: " + str(maximum)

Implement ultimate tic-tac-toe but winning in one sub board ends the game

I am trying to make an ultimate tic-tac-toe game in python which is a little different than the actual one in a way that this game ends when there is a win in any one sub-board. I am using minimax algorithm with alpha-beta pruning to find out the best move for the bot to play. The problem is that when i run the code and it is the time for bot to play its move, it runs endlessly without coming to a conclusion and returning a best_move.
The communication with the board is already handled. All i need is the best value and once i get that, i can retrieve the index from that state.
Initailly, once the game is started, the user is prompted to make a move from 1-9 which is then fed to the function:
Boards is a list of list which contains the state of each sub-board.
# choose a move to play
def play1(user_move):
# print_board(boards)
boards_list = main_boards.tolist()
player = 1
depth = 20
end_move = make_bot_move(boards_list, user_move, player, depth)
place(curr, end_move, 1)
return end_move
The make_bot_move function takes the position of the human and figures out in which sub-board it should play its best_move:
def make_bot_move(state, user_move, player, depth):
#sub_board = state[user_move]
# if suboptimal(state, user_move, player) != 0:
# return suboptimal(state, user_move, player)
pseudo_states = successors(state, player, user_move)
best_move = (-inf, None)
alpha = -inf
beta = inf
for x in pseudo_states:
state = x[0]
index = x[1]
val = minimax(state, index, opponent(player), depth-1, alpha, beta)
if val > best_move[0]:
best_move = (val, index)
# print("val = ", val)
# print_board(s[0])
return best_move[1]
The successors function returns the possible states where it can play its move:
def successors(boards, player, user_move):
sub_board = boards[user_move]
value_index = []
possible_states = []
for idx, value in enumerate(sub_board):
if value == 0 and idx != 0:
value_index.append(idx)
copied_board = deepcopy(boards)
possible_states.append(get_possible_state(copied_board, user_move, idx, player))
#print(possible_states)
return zip(possible_states, value_index)
Finally, every possible move is fed to minimax function which returns a val of the best move:
def minimax(state, last_move, player, depth, alpha, beta):
if depth <= 0 or get_game_status(state, player) != 0:
return evaluate(state, opponent(player))
if player == 1:
max_eval = -inf
pseudo_states = successors(state, player, last_move)
for x in pseudo_states:
state = x[0]
index = x[1]
print(depth)
#print_board(np.array(state))
eval = minimax(state, index, opponent(player), depth-1, alpha, beta)
max_eval = max(max_eval, eval)
alpha = max(alpha, eval)
if beta<= alpha:
break
#print_board(np.array(state))
return max_eval
if player == 2:
min_eval = inf
pseudo_states = successors(state, player, last_move)
for x in pseudo_states:
state = x[0]
index = x[1]
print(depth)
#print_board(np.array(state))
eval = minimax(state, index, opponent(player), depth - 1, alpha, beta)
min_eval = min(min_eval, eval)
beta = min(beta, eval)
if beta<= alpha:
break
#print_board(np.array(state))
return min_eval
To know whether someone has WON || LOSS || DRAW, get_game_status function is called inside minimax function:
def get_game_status(state, player):
other_player = opponent(player)
for each_box in state[1:10]:
win_state = [
[each_box[1], each_box[2], each_box[3]],
[each_box[4], each_box[5], each_box[6]],
[each_box[7], each_box[8], each_box[9]],
[each_box[1], each_box[4], each_box[7]],
[each_box[2], each_box[5], each_box[8]],
[each_box[3], each_box[6], each_box[9]],
[each_box[1], each_box[5], each_box[9]],
[each_box[3], each_box[5], each_box[7]],
]
if [player, player, player] in win_state:
return player
elif [other_player, other_player, other_player] in win_state:
return other_player
else:
return 0
And the scoring is handled using evaluate function:
def evaluate(state, player):
if(get_game_status(state, player) and player ==1) :
score = 10
elif(get_game_status(state, player) and player == 2):
score = -10
else:
score = 0
return score
The expected result is to get the best move but instead, it runs endlessly.
Kindly suggest what changes I should make, or where I am going wrong.

Alpha-beta pruning in python

I'm trying to implement a computer player in a Connect Four type game. Alpha-beta pruning seemed like the best way to achieve this, but I cannot seem to figure out what I'm doing wrong.
The following is the code I've come up with. It starts with a initial root state. For every possible, valid move (and if no pruning occurs) the algorithm: makes a deep copy of the state, updates the state (increases depth, switches turns, adds a piece, sets a heuristic value), and adds this new state to the root's list of successors.
If the new state is not a leaf (i.e. at max depth) it recursively continues. If it is a leaf, the algorithm checks the root's value and appropriate local alpha/beta value and updates accordingly. After all possible valid options have been checked, the algorithm returns the appropriate local alpha/beta value.
At least, that is what I intended. Every run returns a value of 0. As requested here is the initialization code:
class GameState:
def __init__(self, parentState = None):
# copy constructor
if not(parentState == None):
self.matrix = copy.deepcopy(parentState.matrix)
self.successor = copy.deepcopy(parentState.successor)
self.depth = parentState.depth
self.turn = parentState.turn
self.alpha = parentState.alpha
self.beta = parentState.beta
self.connects = copy.deepcopy(parentState.connects)
self.value = parentState.value
self.algo_value = parentState.value
self.solution = parentState.solution
# new instance
else:
# empty board
self.matrix = [[0 for y in xrange(6)] for x in xrange(7)]
## USED WHEN GROWING TREE
self.successor = [] # empty list
self.depth = 0 # start at root
self.turn = 1 # game starts on user's turn
## USED WHEN SEARCHING FOR SOLUTION
self.alpha = float("-inf")
self.beta = float("+inf")
self.connects = [0, 0, 0] # connects in state
self.algo_value = float("-inf")
self.value = 0 # alpha-beta value of connects
self.solution = False # connect four
def alphabeta(root):
if root.depth < MAX_EXPANSION_DEPTH:
# pass down alpha/beta
alpha = root.alpha
beta = root.beta
# for each possible move
for x in range(7):
# ALPHA-BETA PRUNING
# if root is MAXIMIZER
if (root.turn == 2) and (root.algo_value > beta): print "beta prune"
# if root is MINIMIZER
elif (root.turn == 1) and (root.algo_value < alpha): print "alpha prune"
# CANNOT prune
else:
# if move legal
if (checkMove(root, x)):
# CREATE NEW STATE
root.successor.append(GameState(root))
working_state = root.successor[-1]
# update state
working_state.successor = []
working_state.depth += 1
working_state.turn = (working_state.turn % 2) + 1
cons = dropPiece(working_state, x, working_state.turn)
# update state values
# MAXIMIZER
if working_state.turn == 2:
working_state.value = ((cons[0]*TWO_VAL)+(cons[1]*THREE_VAL)+(cons[2]*FOUR_VAL)) + root.value
working_state.algo_value = float("-inf")
# MINIMIZER
else:
working_state.value = ((-1)*((cons[0]*TWO_VAL)+(cons[1]*THREE_VAL)+(cons[2]*FOUR_VAL))) + root.value
working_state.algo_value = float("inf")
# if NOT a leaf node
if (working_state.depth < MAX_EXPANSION_DEPTH):
# update alpha/beta values
working_state.alpha = alpha
working_state.beta = beta
ret = alphabeta(working_state)
# if MAXIMIZER
if (root.turn == 2):
if (ret > root.algo_value): root.algo_value = ret
if (ret > alpha): alpha = ret
# if MINIMIZER
else:
if (ret < root.algo_value): root.algo_value = ret
if (ret < beta): beta = ret
# if leaf, return value
else:
if root.turn == 2:
if (working_state.value > root.algo_value): root.algo_value = working_state.value
if working_state.value > alpha: alpha = working_state.value
else:
if (working_state.value < root.algo_value): root.algo_value = working_state.value
if working_state.value < beta: beta = working_state.value
if root.turn == 2: return alpha
else: return beta
Solved the issue. In the above algorithm, I check for pruning after the loop has moved on to the next successor (whose default algo_values are the respective max and min values).
Instead, the algorithm should check the first node in each list of successors, update its algo_value, and THEN check for pruning of the rest of the nodes in the list of successors.

Categories