How to write minimax algorithm accurately? - python

I've made MiniMax function but it seems it doesn't work,
It should return the optimal play in form [1,2] for example,
def minimax(board,player_to_play):
"""
Returns the optimal action for the current player on the board.
"""
available = actions(board) ##empty slots
to_move = None
if player_to_play == "X": #maxmizing
current_best = -100
for play in available:
temp_board = copy.deepcopy(board)
temp_board[play[0]][play[1]] = "X"
o_move = minimax(temp_board,"O")
score = utility(o_move)
if score > current_best :
to_move = play
else: #minmizing
current_best = 100
for play in available:
temp_board = copy.deepcopy(board)
temp_board[play[0]][play[1]] = "O"
x_move = minimax(temp_board,"X")
score = utility(x_move)
if score < current_best:
to_move = play
return to_move
where utility() return score of board
def utility(board):
"""
Returns 1 if X has won the game, -1 if O has won, 0 otherwise.
"""
if terminal(board):
if winner(board) == "X":
return 1
elif winner(board) == "O":
return -1
else:
return 0
else:
return 0
Is that logically right or what I'm miss understanding?

Related

Created an endless loop, but can't tell why it is endless

So I am creating a card game in python using classes. I got it all set up to the point where it should work. But when I ran it it just simply never stops running. I am not really sure how to make this code minimal and reproduceable, because I do not know where the issue is at.
Here is the code i have written.
It has to be in the play_uno() class object.
""" UNO Simulator """
import random
class card():
def __init__(self, value, color, wild):
'''
An UNO deck consists of 108 cards, of which there are 76 Number cards,
24 Action cards and 8 Wild cards. UNO cards have four color "suits",
which are red, yellow, blue and green.
'''
self.card_nums = ['0','1','2','3','4','5','6','7','8','9']
self.card_colors = ['red','blue','green','yellow']
self.spec_cards = ['draw2','reverse','skip']
self.wild_cards = ['wild','wild_draw_4']
if wild == False:
self.value = self.card_nums[value]
self.color = self.card_colors[color]
elif wild == 'special':
self.value = self.spec_cards[value]
self.color = self.card_colors[color]
elif wild == True:
self.value = self.wild_cards[value]
self.color = None
class generate_deck():
def __init__(self):
self.number_cards = self.get_num_cards()
self.special_cards = self.get_special_cards()
self.wild_cards = self.get_wild_cards()
self.cards = self.number_cards + self.special_cards + self.wild_cards
random.shuffle(self.cards)
def get_num_cards(self):
# only one zero per color
with_zeroes = [card(i,j,False) for i in range(10) for j in range(4)]
no_zeroes = [card(i,j,False) for i in range(1,10) for j in range(4)]
return no_zeroes + with_zeroes
def get_wild_cards(self):
wild_draw4s = [card(i,None,True) for i in range(2) for x in range(2)]
wilds = [card(i,None,True) for i in range(2) for x in range(2)]
return wilds + wild_draw4s
def get_special_cards(self):
return [card(i,j,'special') for i in range(3) for j in range(4) for x in range(2)]
class player():
def __init__(self, name):
self.wins = 0
self.name = name
self.cheater = False
self.cards = ''
self.turn = 0
self.uno = 0
class play_uno():
def __init__(self, num_players = 3, num_cheaters = 0, cards_per_player = 5):
# get started
self.rules = 'default'
self.status = 'ongoing'
self.deck = generate_deck().cards
self.played_cards = []
self.dro = 0
self.direction = 0
self.top_card = self.deck.pop() # random card as first card to play on
self.tot_turns = 0
# generate players, make cheaters later
self.players = [player('player' + str(i)) for i in range(num_players + num_cheaters)]
# give each player 7 cards to start
for _player_ in self.players:
_player_.cards = [self.draw_card() for i in range(cards_per_player)]
# start playing turns in order
# do not know how to reverse yet
"""
Right now it is endless for some reason.
"""
while self.status == 'ongoing':
for _player in self.players:
self.turn(_player)
def draw_card(self):
# draws random card from deck instead of card on top
if len(self.deck) == 0:
self.re_shuffle()
self.dro += 1
return self.deck.pop()
def re_shuffle(self):
self.deck = self.played_cards
random.shuffle(self.deck)
self.played_cards = []
return self.deck, self.played_cards
def play_card(self, player_cards, _card):
self.top_card = _card
return player_cards.remove(_card), self.played_cards.append(_card), self.top_card
def game_over(self, winner):
winner.wins += 1
self.game_status = 'over'
def turn(self, _player):
played = False
# check if someone played wild card last turn
if self.top_card.value in card(1,2,None).wild_cards:
self.top_card.color = random.choice(card.card_colors)
if self.top_card.value == 'wild_draw_4':
_player.cards += [self.draw_card() for i in range(4)]
self.tot_turns += 1
return _player
# check for special cards
elif self.top_card.value in card(1,2,None).spec_cards:
if self.top_card.value == 'draw2':
_player.cards += [self.draw_card() for i in range(4)]
self.tot_turns += 1
return _player
# for now we are treating reverse cards like skips
elif self.top_card.value == 'reverse' or self.top_card.value == 'skip':
played = True
self.tot_turns += 1
return _player
# If its a normal card, or regular wild
if played == False:
for _card in _player.cards:
if _card.color == self.top_card.color:
self.play_card(_player.cards, _card)
played = True
break
elif _card.value == self.top_card.value:
self.play_card(_player.cards, _card)
played = True
break
# if the player cannot play at all
# rn they just move on if they have to draw,
# cant play the card they just drew.
if played == False:
_player.cards += [self.draw_card()]
played = True
self.tot_turns += 1
# check if the player won or not
if len(_player.cards) == 0:
self.game_over(_player)
elif len(_player.cards) == 1:
_player.uno += 1
return _player.cards
In the function turn in the play_uno class you are checking for certain wild/special cards. If the value is reverse, for example, the function hits a return _player line which ends the execution of the function and the player is unable to play another card.
Move the return statements to the end of the function if you want to ensure the rest of the code is run.
I did not run the code, but I think you should test a player's number of cards before drawing again. Like this:
if len(_player.cards) == 0:
self.game_over(_player)
elif len(_player.cards) == 1:
_player.uno += 1
if played == False:
_player.cards += [self.draw_card()]
played = True
self.tot_turns += 1
Or are the rules of this game different? I sincerely don't remember them anymore.
The while loop at the end of play_uno's __init__ checks for status, which never changes.
The loop calls turn on each of players every iteration. You must change status somewhere or you must put an if ...: break in the while loop (e.g. if not _player.cards).
EDIT: It appears that you meant self.status instead of self.game_status, in game_over. Try changing game_status to status there.

Trouble with minimax algorithm in tic tac toe game with adjustable game size

I am trying to create a tic tac toe game with an adjustable game size and a computer that uses the minimax algorithm. The game sizes can only be odd numbers, to make sure that diagonal wins are always possible. The game runs with no errors, but I know that the minimax algorithm isn't working 100% correct because I can still beat the computer. I've looked extensively over my code and cannot find where the algorithm is going wrong. Here is my code:
Main.py
import TicTacToe
import Minimax
if (__name__ == "__main__"):
t = TicTacToe.ttt(3)
m = Minimax.Minimax(3, t)
while (t.winner == None):
if (t.turn == 1):
playerInputI = int(input("Input row: "))
playerInputJ = int(input("Input column: "))
bestIndex = (playerInputI, playerInputJ)
else:
winner, bestIndex = m.minimax(t.grid, (-1, -1), 15, -1)
t.winner = None
t.findWinner(bestIndex, t.grid)
t.updateGameGrid(bestIndex)
print(t.grid)
print(t.grid)
Minimax.py
class Minimax:
def __init__(self, gs, t):
self.gridSize = gs
self.ttt = t
def minimax(self, state, currIndex, depth, turn):
if (currIndex[0] != -1 and currIndex[1] != -1):
winner = self.ttt.findWinner(currIndex, state)
if (winner == -1):
return winner - depth, currIndex
elif (winner == -1):
return winner + depth, currIndex
elif (winner == 0):
return 0, currIndex
if (depth==0 and winner==None):
return 0, currIndex
evalLimit = -turn * 1000
bestIndex = None
for i in range(self.gridSize):
for j in range(self.gridSize):
if (state[i][j] == 0):
state[i][j] = turn
eval, newIndex = self.minimax(state, (i, j), depth-1, -turn)
state[i][j] = 0
if (turn > 0 and eval > evalLimit):
bestIndex = newIndex
evalLimit = eval
elif (turn < 0 and eval < evalLimit):
bestIndex = newIndex
evalLimit = eval
return evalLimit, bestIndex
Tictactoe.py
from random import randint
class ttt:
def __init__(self, size):
self.gridSize = size
self.grid = self.createGrid()
# If using minimax algorithm, user is maximizer(1) and computer is minimizer(-1)
# If single player, then user is 1, computer is -1
# If multiplayer, user1 is 1, user2 = -1
self.turn = 1
self.winner = None
def createGrid(self):
grid = []
for i in range(self.gridSize):
grid.append([])
for j in range(self.gridSize):
grid[i].append(0)
# grid = [[-1, 1, 0], [0, -1, 0], [0, 0, 0]]
return grid
def updateGameGrid(self, index):
if (self.grid[index[0]][index[1]] != 0):
return
self.grid[index[0]][index[1]] = self.turn
winner = self.findWinner(index, self.grid)
self.turn = -self.turn
def randomIndex(self):
x = randint(0, self.gridSize-1)
y = randint(0, self.gridSize-1)
while (self.grid[x][y] != 0):
x = randint(0, self.gridSize-1)
y = randint(0, self.gridSize-1)
return (x, y)
def findWinner(self, index, grid):
# Row
found = True
for j in range(self.gridSize-1):
if (grid[index[0]][j] != grid[index[0]][j+1] or grid[index[0]][j] == 0):
found = False
break
if (found):
self.winner = self.turn
return self.turn
# Column
found = True
for i in range(self.gridSize-1):
if (grid[i][index[1]] != grid[i+1][index[1]] or grid[i][index[1]] == 0):
found = False
break
if (found):
self.winner = self.turn
return self.turn
# Top Left to Bottom Right Diagonal
if (index[0] == index[1]):
found = True
for i in range(self.gridSize-1):
if (grid[i][i] != grid[i+1][i+1] or grid[i][i] == 0):
found = False
break
if (found):
self.winner = self.turn
return self.turn
# Top Right to Bottom Left Diagonal
if (index[0] + index[1] == self.gridSize-1):
found = True
for i in range(self.gridSize-1):
if (grid[self.gridSize-i-1][i] != grid[self.gridSize-i-2][i+1] or grid[self.gridSize-i-1][i] == 0):
found = False
break
if (found):
self.winner = self.turn
return self.turn
tie = True
for i in range(self.gridSize):
for j in range(self.gridSize):
if (grid[i][j] == 0):
tie = False
if (tie):
self.winner = 0
return 0
return None
The grid is represented as a 2d array, with each element being either a -1 for O, 1 for X and 0 for nothing. The player is 1 and the computer is -1. Who's turn it is is represented as a -1 or 1, corresponding to O or X. If anyone is able to find where the error in my code is that would be a great.
I see two troubles :
a) the two following conditions are the same in minimax
if (winner == -1):
return winner - depth, currIndex
elif (winner == -1):
return winner + depth, currIndex
b) When you return bestIndex, you actually return the winning move, the one that completes a line or a row or a diagonal, at one of the leaves of the game tree. What you really want is the next move to play. Write instead bestIndex = (i, j) in the condition if eval > evalLimit
I didn't check for everything but that is a good start. Run your code at depth=1 or 2 with many printf inside the minimax function, and look at the different moves, with the corresponding score, if they look correct or not.

Minimax logic works slow

I'm programming Tic-Tac-Toe using minimax algorithm with alpha-beta pruning after taking Harvard's CS-50.
My logic is working perfectly, except for the return time of minimax (especially on the first call, when only one player played). I was thinking of replacing the extra recursive call of minimax to max/min value. I want to emphasize that minimax is taking relatively slow for my purposes (around 0.5 seconds).
import math
import copy
import time
X = "X"
O = "O"
EMPTY = None
def initial_state():
"""
Returns starting state of the board.
"""
return [[EMPTY, EMPTY, EMPTY],
[EMPTY, EMPTY, EMPTY],
[EMPTY, EMPTY, EMPTY]]
def player(board):
"""
Returns player who has the next turn on a board.
"""
if board == initial_state():
return X
X_counter = 0
O_counter = 0
for row in board:
for cell in row:
if cell == X:
X_counter += 1
elif cell == O:
O_counter += 1
return O if X_counter > O_counter else X
def actions(board):
"""
Returns set of all possible actions (i, j) available on the board.
"""
possible_moves = set()
for i in range(3):
for j in range(3):
if board[i][j] is None:
possible_moves.add((i, j))
return possible_moves
def result(board, action):
"""
Returns the board that results from making move (i, j) on the board.
"""
newboard = copy.deepcopy(board)
newboard[action[0]][action[1]] = player(board)
return newboard
def winner(board):
"""
Returns the winner of the game, if there is one.
"""
if board[0][0] == board[0][1] == board[0][2] != None: # 1, 2, 3
return board[0][0]
if board[1][0] == board[1][1] == board[1][2] != None: # 4, 5, 6
return board[1][0]
if board[2][0] == board[2][1] == board[2][2] != None: # 7, 8, 9
return board[2][0]
if board[0][0] == board[1][0] == board[2][0] != None: # 1, 4, 7
return board[0][0]
if board[0][1] == board[1][1] == board[2][1] != None: # 2, 5, 8
return board[0][1]
if board[0][2] == board[1][2] == board[2][2] != None: # 3, 6, 9
return board[0][2]
if board[0][0] == board[1][1] == board[2][2] != None: # 1, 4, 7
return board[0][0]
if board[0][2] == board[1][1] == board[2][0] != None: # 3, 5, 7
return board[0][2]
return None
def terminal(board):
"""
Returns True if game is over, False otherwise.
"""
def check_draw(board):
for row in board:
for cell in row:
if cell is None:
return False
return True
if winner(board) is not None or check_draw(board): # or someone won , or there is a draw
return True
return False
def utility(board):
"""
Returns 1 if X has won the game, -1 if O has won, 0 otherwise.
"""
temp = winner(board)
if temp == X:
return 1
elif temp == O:
return -1
return 0
def minimax(board):
"""
Returns the optimal action for the current player on the board.
"""
options = actions(board)
if player(board) == X:
vT = -math.inf
move = set()
for action in options:
v, count = minvalue(result(board,action), -math.inf, math.inf, 0)
if v > vT:
vT = v
move = action
else:
vT = math.inf
move = set()
for action in options:
v, count = maxvalue(result(board,action), -math.inf, math.inf, 0)
if v < vT:
vT = v
move = action
return move
def maxvalue(board,alpha,beta,count):
"""
Calculates the max value of a given board recursively together with minvalue
"""
if terminal(board): return utility(board), count+1
v = -math.inf
posactions = actions(board)
for action in posactions:
vret, count = minvalue(result(board, action),alpha,beta,count)
v = max(v, vret)
alpha = max(alpha, v)
if alpha > beta:
break
return v, count+1
def minvalue(board,alpha,beta,count):
"""
Calculates the min value of a given board recursively together with maxvalue
"""
if terminal(board): return utility(board), count+1
v = math.inf
posactions = actions(board)
for action in posactions:
vret, count = maxvalue(result(board, action),alpha,beta,count)
v = min(v, vret)
beta = min(v, beta)
if alpha > beta:
break
return v, count + 1
Rather than:
def player(board):
"""
Returns player who has the next turn on a board.
"""
if board == initial_state():
return X
X_counter = 0
O_counter = 0
for row in board:
for cell in row:
if cell == X:
X_counter += 1
elif cell == O:
O_counter += 1
return O if X_counter > O_counter else X
What kind of performance do you get with:
def player(board):
"""
Returns player who has the next turn on a board.
"""
counts = collections.Counter(cell for row in board for cell in row if cell)
return O if counts.get(X, 0) > counts.get(O, 0) else X
A question might arise as to why improving the performance of player might make a big difference.
To calculate the first move, I counted the number of times each method was called:
[('winner', 53673), ('player', 30442), ('result', 30441), ('terminal', 30441), ('utility', 23232), ('maxvalue', 16248), ('actions', 14419), ('minvalue', 14193), ('minimax', 1)]
We can see that player() is called very often. The next candidate to look at is clearly winner() as it is called even more frequently than player(). We can observe that winner() is only ever called by terminal() and utility(). Further, we can observe that terminal() and utility() are only ever called together like:
if terminal(board): return utility(board), count+1
So let's see if we can do something about combining them.
How about:
def terminal(board):
"""
Returns True if game is over, False otherwise.
"""
is_winner = winner(board)
if is_winner == X:
return 1
if is_winner == O:
return -1
if not actions(board):
return 0
return None
and we can use it like:
who_wins = terminal(board)
if who_wins is not None: return who_wins, count+1
This takes the heat off of winner():
[('player', 30442), ('result', 30441), ('terminal', 30441), ('winner', 30441), ('maxvalue', 16248), ('actions', 14419), ('minvalue', 14193), ('minimax', 1)]
This takes an addition 25% off the already reduced time.
On my system, the original code takes (on average) 2.5 seconds for a first move. With these simple updates, the same recommended move is generated in 0.001 seconds on average.

My Tic Tac Toe with use of the minimax algorithm is not working

My tic-tac-toe doesn't seem to work properly. I have tried various things, but nothing changes.
You can run the script yourself, just to see that every time after asking the player's move the game makes a vertical line of X's at the desired column and ends there.
It's probably a problem with my implementation of minimax or the computedMove function, although i cannot locate any errors in there.
# Boardsize initialization
boardSize = 0
# Board initialization
board = []
person = 'X'
ai = 'O'
#This variable indicates the player who has their turn at the moment.
currentPlayer = ''
# This shows the board.
for n in range (boardSize):
for m in range (boardSize):
print (" - "),
print ("\n")
#Checking if somebody won (only horizontal or vertical)
def winLine(line, letter):
return all(n == letter for n in line)
#New list from diagonals
def winDiagonal(board):
return (board[n][n] for n in range (boardSize))
#The function universally checks whether somebody has won, or not.
def checkWinner (board):
#Liczenie wolnych pol
openSpots = 0
for n in range(boardSize):
for m in range(boardSize):
if board[n][m] == '0':
openSpots += 1
#Transposition of the board, so it's possible to use winline() here
for letter in (person, ai):
transPos = list(zip(*board))
#Horizontal check
if any(winLine(row, letter) for row in board):
return letter
#Vertical check
elif any (winLine(col, letter) for col in transPos):
return letter
#Diagonal check
elif any (winLine(winDiagonal(dummy), letter) for dummy in (board, transPos)):
return letter
elif openSpots == 0: return 'tie'
else: return 'N/A'
#This function returns the player's move
def playerMove (row, col):
#Checking if the field is clear
if board[row][col] == '0':
board[row-1][col-1] = person
else:
print('You cannot make that move.')
#Minimax constants
plusInf = float('inf')
minusInf = float('-inf')
#Lookup table for minimax scores
scores = {
'X': 10,
'O': -10,
'None': 0
}
#Minimax itself
def minimax(baord, depth, maximizes):
#Checking whether anybody has won
res = checkWinner(board)
if (res != 'N/A'):
return scores[res]
#Maximizing player
if maximizes:
minmaxBoard = board.copy()
maxTarget = minusInf
for n in range(boardSize):
for m in range(boardSize):
if minmaxBoard[n][m] == '0':
minmaxBoard[n][m] = ai
score = minimax(minmaxBoard, depth + 1, False)
maxTarget = max(score, maxTarget)
return maxTarget
#Minimizing player
else:
minTarget = plusInf
minmaxBoard = board.copy()
for n in range(boardSize):
for m in range(boardSize):
if minmaxBoard[n][m] == '0':
minmaxBoard[n][m] = person
score = minimax(minmaxBoard, depth + 1, True)
minTarget = min(score, minTarget)
return minTarget
#The computer uses this function to make its move
def computedMove():
computedTarget = minusInf
for n in range(boardSize):
for m in range(boardSize):
newBoard = board.copy()
if newBoard[n][m] == '0':
newBoard[n][m] = ai
score = minimax(newBoard, 0, False)
if score > computedTarget:
computedTarget = score
move = (n,m)
board[move[0]][move[1]] = ai
# Getting input for the player's move
def getPlayerMove():
res = input('Please type in your move on the form \"x y\", x being the number of the column and y the number of the row of your choosing.\n')
col, row = res.split(" ")
row = int(row)
col = int(col)
move = (row, col)
return move
# Drawing the board
def drawBoard():
for n in range(boardSize):
for m in range(boardSize):
if board[n][m] == '0':
print(' - ', end='')
else:
print(' '+board[n][m]+' ', end='')
print('\n')
# Current state of the game, False at first
playing = False
#The game loop
while True:
currentPlayer = person
boardSize = int(input("Please enter the size of the board. (one sie)\n"))
board = [['0']*boardSize]*boardSize
print("You go first.")
playing = True
while playing:
if currentPlayer == person:
drawBoard()
move = getPlayerMove()
playerMove(move[0]-1, move[1]-1)
if checkWinner(board) == person:
drawBoard()
print("Yaay, you won!")
playing = False
else:
if checkWinner(board) == 'tie':
drawBoard()
print('It\'s a tie!')
break
else:
currentPlayer = ai
if currentPlayer == ai:
computedMove()
if checkWinner(board) == ai:
drawBoard()
print('You lose!')
playing = False
else:
if checkWinner(board) == 'tie':
drawBoard()
print('It\'s a tie!')
break
else:
currentPlayer = person
if not input('Do you want to play again?').lower().startswith('y'):
break
Check out this statement:
board = [['0']*boardSize]*boardSize
You're essentially creating a list of references to the same list boardSize times. That's why when you're assigning something to board[i][j] element, it gets assigned to j-th elements of all rows (from board[0] to board[len(board)]), because all rows are referencing the same list.
Use this instead:
board = [['0'] * boardSize for _ in range(boardSize)]
There are other issues with this code though. I'm sure you're decrementing your (x, y) indexes multiple times, for example. I didn't check it further.

Minimax python returns a win when it won't win

I am making a minimax in python, using 3 functions: max, min, and minimax. It seems to be working, but every win condition starts with ['X','X','X',....]
(... being any other X & O combination)
Why does this happen?
Program Parts:
Minimax
def minimax(game_state):
moves = game_state.get_possible_moves()
print('possible moves are:'+str(moves))
best_move = None
best_score = -2
for move in moves:
clone = game_state.get_next_state(move, True)
score = max_play(clone)
if score > best_score:
best_move = move
best_score = score
return ['best move'+str(best_move), 'best score'+str(best_score)]
Max_play
def max_play(game_state):
if game_state.is_gameover():
return evals(game_state)
moves = game_state.get_possible_moves()
best_score = -2 #not possible
best_move = None
for move in moves:
clone = game_state.get_next_state(move, True)
score = min_play(clone)
if score > best_score:
best_move = move
best_score = score
Min_play
def min_play(game_state):
if game_state.is_gameover():
return evals(game_state)
moves = game_state.get_possible_moves()
best_move = None
best_score = 2 #not possible
for move in moves:
clone = game_state.get_next_state(move, False)
score = max_play(clone)
if score < best_score:
best_move = move
best_score = score
return best_score
Class and Evaluation
class GameState:
def __init__(self,board):
self.board = board
self.character = 'X'
self.winning_combos = [[0,1,2],[3,4,5],[6,7,8],[0,3,6],[1,4,7],[2,5,8],[0,4,8],[2,4,8]]
def is_gameover(self):
if self.board.count('X') + self.board.count('O') == 9:
return True
for combo in self.winning_combos:
if (self.board[combo[0]] == 'X' and self.board[combo[1]] == 'X' and self.board[combo[2]] == 'X') or (self.board[combo[0]] == 'O' and self.board[
return True
return False
def get_possible_moves(self):
squares = []
for square in self.board:
if square != 'X' and square != 'O':
squares.append(int(square))
return squares
def get_next_state(self, move, computer_turn):
copy = list(self.board)
if computer_turn:
copy[move] = 'X'
else:
copy[move] = 'O'
return GameState(copy)
def evals(game_state):
for combo in [[0,1,2],[3,4,5],[6,7,8],[0,3,6],[1,4,7],[2,5,8],[0,4,8],[2,4,8]]:
if game_state.board[combo[0]] == 'X' and game_state.board[combo[1]] == 'X' and game_state.board[combo[2]] == 'X':
print('WIN',game_state.board)
return 1
elif game_state.board[combo[0]] == 'O' and game_state.board[combo[1]] == 'O' and game_state.board[combo[2]] == 'O':
print('LOSS',game_state.board)
return -1
else:
return 0

Categories