How do I implement a minimax function in a Tic-Tac-Toe game in Python? [duplicate]

How do I implement a minimax function in a Tic-Tac-Toe game in Python? [duplicate] - python

I am trying to make a Tic-Tac-Toe game in Python using PyGame and the MiniMax algorithm. The AI plays really well when given the first chance (playing as 'X'), but becomes dumb enough to help the user win when not given the first chance (playing as 'O'). I think I know what the problem is but changing it is messing with the whole program and is not going by the given docstrings.
I've made two python files - one for the GUI (runner.py) and the other for the logic behind the game and the AI (tictactoe.py).
This is the logic behind the game:
# Import module `copy` for function `deepcopy` to deeply copy an
# original (mutable) object to save the object from mutations
import copy
X = 'X'
O = 'O'
EMPTY = None
def initial_state():
"""Returns starting state of the board
"""
return [
[EMPTY, EMPTY, EMPTY],
[EMPTY, EMPTY, EMPTY],
[EMPTY, EMPTY, EMPTY]
]
def display(board, autoprint=False):
"""Displays the board nested list as
a 3x3 matrix for board visualization
"""
vis_board = ''
for row in board:
for playr in row:
if playr is None:
playr = ' '
playr += ' '
vis_board += playr
vis_board += '\n'
if autoprint:
print(vis_board)
return vis_board
def player(board):
"""Returns player who has the next turn on a board
"""
global X, O
# Initial values for every call of the function
X_count = 0
O_count = 0
for row in board:
for playr in row:
if playr == X:
X_count += 1
elif playr == O:
O_count += 1
# `X` always starts first
if O_count < X_count:
return O
return X
def actions(board):
"""Returns set of all possible actions
(i, j) available on the board
"""
global EMPTY
action_set = set()
for i, row in enumerate(board):
for j, playr in enumerate(row):
if playr is EMPTY:
action_set.add((i, j))
return action_set
def result(board, action):
"""Returns the board that results from
making move (i, j) on the board.
"""
global EMPTY
if type(action) is not tuple or len(action) != 2:
raise Exception('invalid action taken')
# Using `deepcopy` to make a deepcopy of *board*
# as duplication by slicing entire list and by
# type conversion is not working poperly
dup_board = copy.deepcopy(board)
# Unpack the coordinates as `I` and `J`
I, J = action
# Check if place has not already been used
if dup_board[I][J] is EMPTY:
dup_board[I][J] = player(dup_board)
else:
raise Exception('invalid action taken')
return dup_board
def is_full(board):
"""Returns True if all places have been occupied, else returns False
"""
global EMPTY
for row in board:
for playr in row:
if playr is EMPTY:
return False
return True
def winner(board):
"""Returns the winner of the game, if there is one.
"""
winr = None # Initial declaration to avoid errors if no winner found
# Check diagonally
if (board[1][1] == board[0][0] and board[0][0] == board[2][2])\
or (board[1][1] == board[0][2] and board[0][2] == board[2][0]):
winr = board[1][1]
return winr
for i in range(3):
# Check each row for three-in-a-row
if board[i][0] == board[i][1] and board[i][1] == board[i][2]:
winr = board[i][1]
break
# Check each column for three-in-a-column
elif board[0][i] == board[1][i] and board[1][i] == board[2][i]:
winr = board[1][i]
break
return winr
def terminal(board):
"""Returns True if game is over, False otherwise.
"""
if winner(board) is None and not is_full(board):
return False
return True
def utility(board):
"""Returns 1 if X has won the game, -1 if O has won, 0 otherwise.
"""
global X, O
if terminal(board):
winr = winner(board)
if winr == X:
util = 1
elif winr == O:
util = -1
else:
util = 0
return util
return None
def get_best_score(board, is_max_turn):
"""Returns the best value of values of all possible moves
"""
if utility(board) is not None:
return utility(board)
scores = []
# Recursively help `minimax` choose the best action
# in `actions` of *board* by returning the best value
for action in actions(board):
rslt = result(board, action)
scores.append(get_best_score(rslt, not is_max_turn))
return max(scores) if is_max_turn else min(scores)
def minimax(board):
"""Returns the optimal action for the current player on the board.
"""
if terminal(board):
return None
best_score = -float('inf') # Least possible score
best_action = None
for action in actions(board):
rslt = result(board, action)
score = get_best_score(rslt, False)
if score > best_score:
best_score = score
best_action = action
return best_action
The GUI code file:
# Import module `PyGame` for a GUI
import pygame
import sys
import time
# Import module `tictactoe` (from the same folder as
# this file `__file__`) for the logic of the game's AI
import tictactoe as ttt
pygame.init()
size = width, height = 600, 400
# Colors
black = (0, 0, 0)
white = (255, 255, 255)
screen = pygame.display.set_mode(size)
mediumFont = pygame.font.Font('OpenSans-Regular.ttf', 24)
largeFont = pygame.font.Font('OpenSans-Regular.ttf', 40)
moveFont = pygame.font.Font('OpenSans-Regular.ttf', 60)
user = None
board = ttt.initial_state()
ai_turn = False
while True:
for event in pygame.event.get():
if event.type == pygame.QUIT:
sys.exit()
screen.fill(black)
# Let user choose a player.
if user is None:
# Draw title
title = largeFont.render('Play Tic-Tac-Toe', True, white)
titleRect = title.get_rect()
titleRect.center = (round(width/2), 50)
screen.blit(title, titleRect)
# Draw buttons
playXButton = pygame.Rect(round(width/8), round(height/2), round(width/4), 50)
playX = mediumFont.render('Play as X', True, black)
playXRect = playX.get_rect()
playXRect.center = playXButton.center
pygame.draw.rect(screen, white, playXButton)
screen.blit(playX, playXRect)
playOButton = pygame.Rect(5*round(width/8), round(height/2), round(width/4), 50)
playO = mediumFont.render('Play as O', True, black)
playORect = playO.get_rect()
playORect.center = playOButton.center
pygame.draw.rect(screen, white, playOButton)
screen.blit(playO, playORect)
# Check if button is clicked
click, _, _ = pygame.mouse.get_pressed()
if click == 1:
mouse = pygame.mouse.get_pos()
time.sleep(0.5)
if playXButton.collidepoint(mouse):
user = ttt.X
elif playOButton.collidepoint(mouse):
user = ttt.O
else:
# Draw game board
tile_size = 80
tile_origin = (width / 2 - (1.5 * tile_size),
height / 2 - (1.5 * tile_size))
tiles = []
for i in range(3):
row = []
for j in range(3):
rect = pygame.Rect(
round(tile_origin[0]+j*tile_size),
round(tile_origin[1]+i*tile_size),
round(tile_size), round(tile_size)
)
pygame.draw.rect(screen, white, rect, 3)
if board[i][j] != ttt.EMPTY:
move = moveFont.render(board[i][j], True, white)
moveRect = move.get_rect()
moveRect.center = rect.center
screen.blit(move, moveRect)
row.append(rect)
tiles.append(row)
game_over = ttt.terminal(board)
player = ttt.player(board)
# Show title
if game_over:
winner = ttt.winner(board)
if winner is None:
title = f'Game Over: Tie.'
else:
title = f'Game Over: {winner} wins.'
elif user == player:
title = f'Play as {user}'
else:
title = f'AI thinking...'
title = largeFont.render(title, True, white)
titleRect = title.get_rect()
titleRect.center = (round(width/2), 30)
screen.blit(title, titleRect)
# Check for AI move
if user != player and not game_over:
if ai_turn:
time.sleep(0.5)
move = ttt.minimax(board)
board = ttt.result(board, move)
ai_turn = False
else:
ai_turn = True
# Check for a user move
click, _, _ = pygame.mouse.get_pressed()
if click == 1 and user == player and not game_over:
mouse = pygame.mouse.get_pos()
for i in range(3):
for j in range(3):
if (board[i][j] == ttt.EMPTY and tiles[i][j].collidepoint(mouse)):
board = ttt.result(board, (i, j))
if game_over:
againButton = pygame.Rect(round(width/3), round(height-65), round(width/3), 50)
again = mediumFont.render('Play Again', True, black)
againRect = again.get_rect()
againRect.center = againButton.center
pygame.draw.rect(screen, white, againButton)
screen.blit(again, againRect)
click, _, _ = pygame.mouse.get_pressed()
if click == 1:
mouse = pygame.mouse.get_pos()
if againButton.collidepoint(mouse):
time.sleep(0.2)
user = None
board = ttt.initial_state()
ai_turn = False
pygame.display.flip()
These are the sidenotes for the answers given by the organization that gave these questions:
No changing the no. of parameters or the parameters themselves in any functions.
Follow the docstrings written in all functions
New functions may be defined as you wish
Please let me know if there are any bugs/errors which are causing the AI to be dumb when playing as 'O'. I believe the bug is in utility, but I can't change the code because it is not allowed (written in the docstrings).
Thank you!
Edit: The problem has been ALMOST solved, but the AI becomes dumb sometimes, like not trying to block the user's move with the opposite symbol, etc.

best_score = -float('inf') # Least possible score
you need to vary this according to the player for which you calculate the move. I think because of this the negative player is choosing random/first plausible move.
I have implemented minimax and related heuristics like 2 times, and always found that using the "negamax" approach worked best, since you don't need to worry about when to apply max and when min based on the player.

Related

How to turn set items into indexes for an array

I am currently trying to solve an exercise in CS50 AI, where I am supposed to create a tictactoe using a minimax algorithm. While doing this, I have to also create a function that generates possible options as well a function that generates a new state of the board as soon as one of the possible actions is chosen. However, I am having an error in my code.
This is the error:
File "c:\Users\Melisa\OneDrive\Desktop\tictactoe\tictactoe.py", line 40, in result
kopja[i][j] = player(board)
TypeError: list indices must be integers or slices, not tuple`
This is my code for the whole problem:
import copy
import math
X = "X"
O = "O"
EMPTY = None
def initial_state():
return [[EMPTY, EMPTY, EMPTY],
[EMPTY, EMPTY, EMPTY],
[EMPTY, EMPTY, EMPTY]]
def player(board):
Xc= 0
Oc= 0
for i in board:
for j in i:
if j == X:
Xc+=1
if j == O:
Oc+=1
# since we start with x
if Xc>Oc:
return O
else:
return X
def actions(board):
possible= set()
for i in range(0,len(board)):
for j in range(len(board[0])):
if board[i][j]== EMPTY:
possible.add((i,j))
return possible
def result(board, action):
kopja = copy.deepcopy(board)
i,j = action[0],action[1]
kopja[i][j] = player(board)
return kopja
def winner(board):
# present all of the winning outlays in a manual way:
# horisontally; only i changes ,j is contantly 0,1,2
for e in range(3):
if board[e][0]==board[e][1]==board[e][2] and board[e][0] != EMPTY:
winneri = board[0][e]
# diagonals are purely fixed
if (board[0][0]==board[1][1]==board[2][2] or board[0][2]==board[1][1]==board[2][0])and board[1][1]!=EMPTY:
winneri= board[1][1]
else:
winneri= None
return winneri
def terminal(board):
if winner(board) == X or winner(board) == O:
return True
for i in range(3):
for j in range(3):
if board[i][j] == EMPTY:
return False
return True
def utility(board):
if winner(board)== X:
return 1
if winner(board)== O:
return -1
else:
return 0
def maxval(board):
if terminal(board):
return utility(board)
else:
v = float('-inf')
for action in actions(board):
v = max(v,minval(result(board,action)))
return v
def minval(board):
if terminal(board):
return utility(board)
else:
v = float('inf')
for action in actions(board):
v = min(v,maxval(result(board,action)))
return v
def minimax(board):
if terminal(board):
return None
else:
listx=[]
if player(board) == X:
for action in actions(board):
listx.append((minval(result(board,action)),action))
listx.reverse()
listi=listx
return listi[0]
if player(board) == O:
listo=[]
for action in actions(board):
listo.append((maxval(result(board,action)),action))
listo.reverse()
lista=listo
return lista[0]
Here is the code that CS50 uses to run the program:
import pygame
import sys
import time
import tictactoe as ttt
pygame.init()
size = width, height = 600, 400
black = (0, 0, 0)
white = (255, 255, 255)
screen = pygame.display.set_mode(size)
mediumFont = pygame.font.Font("OpenSans-Regular.ttf", 28)
largeFont = pygame.font.Font("OpenSans-Regular.ttf", 40)
moveFont = pygame.font.Font("OpenSans-Regular.ttf", 60)
user = None
board = ttt.initial_state()
ai_turn = False
while True:
for event in pygame.event.get():
if event.type == pygame.QUIT:
sys.exit()
screen.fill(black)
# Let user choose a player.
if user is None:
# Draw title
title = largeFont.render("Play Tic-Tac-Toe", True, white)
titleRect = title.get_rect()
titleRect.center = ((width / 2), 50)
screen.blit(title, titleRect)
# Draw buttons
playXButton = pygame.Rect((width / 8), (height / 2), width / 4, 50)
playX = mediumFont.render("Play as X", True, black)
playXRect = playX.get_rect()
playXRect.center = playXButton.center
pygame.draw.rect(screen, white, playXButton)
screen.blit(playX, playXRect)
playOButton = pygame.Rect(5 * (width / 8), (height / 2), width / 4, 50)
playO = mediumFont.render("Play as O", True, black)
playORect = playO.get_rect()
playORect.center = playOButton.center
pygame.draw.rect(screen, white, playOButton)
screen.blit(playO, playORect)
# Check if button is clicked
click, _, _ = pygame.mouse.get_pressed()
if click == 1:
mouse = pygame.mouse.get_pos()
if playXButton.collidepoint(mouse):
time.sleep(0.2)
user = ttt.X
elif playOButton.collidepoint(mouse):
time.sleep(0.2)
user = ttt.O
else:
# Draw game board
tile_size = 80
tile_origin = (width / 2 - (1.5 * tile_size),
height / 2 - (1.5 * tile_size))
tiles = []
for i in range(3):
row = []
for j in range(3):
rect = pygame.Rect(
tile_origin[0] + j * tile_size,
tile_origin[1] + i * tile_size,
tile_size, tile_size
)
pygame.draw.rect(screen, white, rect, 3)
if board[i][j] != ttt.EMPTY:
move = moveFont.render(board[i][j], True, white)
moveRect = move.get_rect()
moveRect.center = rect.center
screen.blit(move, moveRect)
row.append(rect)
tiles.append(row)
game_over = ttt.terminal(board)
player = ttt.player(board)
# Show title
if game_over:
winner = ttt.winner(board)
if winner is None:
title = f"Game Over: Tie."
else:
title = f"Game Over: {winner} wins."
elif user == player:
title = f"Play as {user}"
else:
title = f"Computer thinking..."
title = largeFont.render(title, True, white)
titleRect = title.get_rect()
titleRect.center = ((width / 2), 30)
screen.blit(title, titleRect)
# Check for AI move
if user != player and not game_over:
if ai_turn:
time.sleep(0.5)
move = ttt.minimax(board)
board = ttt.result(board, move)
ai_turn = False
else:
ai_turn = True
# Check for a user move
click, _, _ = pygame.mouse.get_pressed()
if click == 1 and user == player and not game_over:
mouse = pygame.mouse.get_pos()
for i in range(3):
for j in range(3):
if (board[i][j] == ttt.EMPTY and tiles[i][j].collidepoint(mouse)):
board = ttt.result(board, (i, j))
if game_over:
againButton = pygame.Rect(width / 3, height - 65, width / 3, 50)
again = mediumFont.render("Play Again", True, black)
againRect = again.get_rect()
againRect.center = againButton.center
pygame.draw.rect(screen, white, againButton)
screen.blit(again, againRect)
click, _, _ = pygame.mouse.get_pressed()
if click == 1:
mouse = pygame.mouse.get_pos()
if againButton.collidepoint(mouse):
time.sleep(0.2)
user = None
board = ttt.initial_state()
ai_turn = False
pygame.display.flip()
I tried to use the tuples inside the set as indexes for the array(board) by assigning them :
i,j = action[0],action[1]
and expected this solution to work, but instead it generated an error.

After some digging thru your code, I think I figured it out. Did you check the value of action before the call to result()? If so, I think you will find it is not a what you think it is. (In other words, it does not look like (1,1)).
The cause is complicated...it begins near the end of minimax() then propagates thru your code. The code segment of interest is repeated below for easy reference:
if player(board) == X:
for action in actions(board):
listx.append((minval(result(board,action)),action))
listx.reverse()
listi=listx
return listi[0]
If I understand your code, inside this for loop you are creating listx as a list of tuples with (value, action) pairs. Then, when you exit the loop, you return listi[0] which is the 1st (value, action) tuple in the list. You want to return the action from the first tuple, which is listi[0][1].
Once you get that fixed, review the logic in the loop. You are creating a list and reversing it each time thru the loop. Its not clear to me why are you reversing it. Do you want to sort the tuples based on the value? If so, you should do that after you exit the loop, AND use the value in the tuple as the sort key. Also, you really don't need 2 lists.

Optimizing Negamax Function with 5x5 Hexapawn

I need to improve the speed of this program, because at the moment it is pretty slow. I know that representing game states in binary can be very effective, however, I don't know how to do that. I have also tried using numba, however that seems to make it slower. I have attached the code below. Thank you to anyone who can help!
import pygame, sys, time, hashlib
from copy import deepcopy
pygame.init()
red = pygame.Color(255,0,0)
white = pygame.Color(255,255,255)
black = pygame.Color(0,0,0)
pygame.display.set_caption('Hexapawn AI')
width, height = 700,700
game_window = pygame.display.set_mode((width, height))
def set_pawns():
global game_window, board
for y in range(5):
for x in range(5):
if board[y][x] == 1:
game_window.blit( blue_pawn, ( (width/5)*x, (height/5)*(4-y) ))
if board[y][x] == -1:
game_window.blit( red_pawn, ( (width/5)*x , (height/5)*(4-y) ))
def build_lines():
global game_window
for x in range(1,5):
pygame.draw.line(game_window, black, (width/5 * x, 0), (width/5 * x, height), 7)
pygame.draw.line(game_window, black, (0, height/5 * x), (width, height/5 * x), 7)
def get_possible_moves(board, player):
possible_moves = []
forward = 1 if player == 1 else -1
opponent = -1 if player == 1 else 1
for y in range(5):
for x in range(5):
if board[y][x] != player:
continue
if x-1 >= 0 and y+forward < 5 and board[y+forward][x-1] == opponent:
possible_moves.append([x,y,x-1,y+forward])
if x+1 < 5 and y+forward < 5 and board[y+forward][x+1] == opponent:
possible_moves.append([x,y,x+1,y+forward])
if (y+1 < 5 and player == 1) or (y+1 > -1 and player == -1):
if board[y+forward][x] == " ":
possible_moves.append([x,y,x,y+forward])
return possible_moves
def make_move(board,move,player):
global game_window, width, height
game_window.fill(white)
build_lines()
board[move[1]][move[0]] = " "
board[move[3]][move[2]] = player
set_pawns()
def neg_make_move(board, move, player):
x1, y1, x2, y2 = move
board = deepcopy(board)
board[y1][x1] = " "
board[y2][x2] = player
return board
def check_for_win(board,player):
if player == -1:
if -1 in board[0]:
return True
if get_possible_moves(board,1) == []:
return True
elif player == 1:
if 1 in board[4]:
return True
if get_possible_moves(board,-1) == []:
return True
return False
TRANSPOSITION_TABLE = {}
def state_hash(board):
serialized = str(board).encode()
return hashlib.sha256(serialized).hexdigest()
def store(table, board, alpha, beta, best, depth):
state = state_hash(board)
if best[1] <= alpha:
flag = 'UPPERCASE'
elif best[1] >= beta:
flag = 'LOWERCASE'
else:
flag = 'EXACT'
table[state] = [best, flag, depth]
def negamax(board, depth, turn, alpha, beta):
alpha_org = alpha
state = state_hash(board)
if state in TRANSPOSITION_TABLE:
tt_entry = TRANSPOSITION_TABLE[state]
if tt_entry[2] >= depth:
if tt_entry[1] == 'EXACT':
return tt_entry[0]
elif tt_entry[1] == 'LOWERCASE':
alpha = max(alpha, tt_entry[0][1])
elif tt_entry[1] == 'UPPERCASE':
beta = min(beta, tt_entry[0][1])
if alpha >= beta:
return tt_entry[0]
if check_for_win(board, -turn):
return None, -(25+depth)
if depth == 0:
return get_possible_moves(board,turn)[0], (depth)
best_score = -200
for move in get_possible_moves(board,turn):
new_board = neg_make_move(board, move, turn)
score = -negamax(new_board, depth - 1, -turn, -beta, -alpha)[1]
alpha = max(alpha,score)
if score > best_score:
best_score, best_move = score, move
if alpha >= beta:
break
store(TRANSPOSITION_TABLE, board, alpha_org, beta, [best_move,best_score], depth)
return best_move, best_score
# Build board
board = [[1 for x in range(5)]]
for x in range(3):
board.append([" " for x in range(5)])
board.append([-1 for x in range(5)])
game_window.fill(white)
# Draw game board lines
build_lines()
# Load sprites with correct sizes
tile_size = (width/5,height/5)
blue_pawn = pygame.transform.scale(pygame.image.load("blue_pawn.png"), tile_size)
red_pawn = pygame.transform.scale(pygame.image.load("red_pawn.png"), tile_size)
# Draw the pawns to the board
set_pawns()
pygame.display.update()
while True:
for event in pygame.event.get():
# if user clicks the X or they type esc then the screen will close
if event.type == pygame.QUIT:
pygame.quit()
sys.exit()
if event.type == pygame.KEYDOWN:
if event.key == pygame.K_ESCAPE:
pygame.quit()
sys.exit()
start = time.time()
move = negamax(board,12,1,-10000,10000)[0]
print(f"Blue move took {time.time()-start} seconds to calculate.")
make_move(board,move,1)
pygame.display.update()
if check_for_win(board,1):
print("Blue Wins!")
pygame.quit()
sys.exit()
time.sleep(1)
start = time.time()
move = negamax(board,12,-1,-10000,10000)[0]
print(f"Red move took {time.time()-start} seconds to calculate.")
make_move(board,move,-1)
pygame.display.update()
if check_for_win(board,-1):
print("Red Wins!")
pygame.quit()
sys.exit()
pygame.display.update()
time.sleep(1)

Attribute Error: List object has no attribute CheckClick

In the Swap function, I am checking under some index a button class instance, which I checked using print statements, but for some reason it still gives me an error saying that it has no such attribute check click. Any tips on formatting are also welcome, I am just a beginner. I am using three different arrays to hold various instances, values and coordinates for each corresponding array position. I am trying to make a sort of match 3 game. Thanks for any help
from os import access
import pygame,sys
from random import randrange
import numpy
#Constants
Columns =5
Rows = 5
X,Y = 320,0
class Button:
def __init__(self,x,y,image,scale):
self.x = x
self.y=y
self.image = pygame.transform.scale(image,(scale,scale))
self.scale = scale
self.rect = self.image.get_rect(topleft=(x,y))
self.clicked = False
self.Action = False
def Draw(self):
Win.blit(self.image,(self.x,self.y))
def CheckClick(self):
isClicked = False
mousepos= pygame.mouse.get_pos()
if self.rect.collidepoint(mousepos):
if pygame.mouse.get_pressed()[0] ==1 and self.clicked == False:
self.clicked = True
self.Action = True
if pygame.mouse.get_pressed()[0] ==0:
self.clicked = False
return self.Action
#Win
WinWidth, WinHeight = 1280,800
Win = pygame.display.set_mode((WinWidth,WinHeight))
#IMAGES
test_img = pygame.image.load("test.png")
red_img = pygame.image.load("Red.png")
green_img = pygame.image.load("green.png")
blue_img = pygame.image.load("blue.png")
#Board
Board = [[ randrange(0,3) for column in range(Columns)] for row in range(Rows) ]
BoardObjs = []
BoardXYs = [[None for column in range(Columns)]for row in range(Rows)]
#Fill BoardXYS
for k in range(len(Board)):
for j in range(len(Board[1])):
BoardXYs[k][j] = [X,Y]
X += 160
X = 320
Y += 160
for r in range(len(Board)):
for t in range(len(Board[1])):
if Board[r][t] == 0:
BoardObjs.append(Button(BoardXYs[r][t][0],BoardXYs[r][t][1],red_img,100))
if Board[r][t] == 1:
BoardObjs.append(Button(BoardXYs[r][t][0],BoardXYs[r][t][1],green_img,100))
if Board[r][t] == 2:
BoardObjs.append(Button(BoardXYs[r][t][0],BoardXYs[r][t][1],blue_img,100))
BoardObjs = [[BoardObjs[0],BoardObjs[1],BoardObjs[2],BoardObjs[3],BoardObjs[4]],
[BoardObjs[5],BoardObjs[6],BoardObjs[7],BoardObjs[8],BoardObjs[9]],
[BoardObjs[10],BoardObjs[11],BoardObjs[12],BoardObjs[13],BoardObjs[14]],
[BoardObjs[15],BoardObjs[16],BoardObjs[17],BoardObjs[18],BoardObjs[19]],
[BoardObjs[20],BoardObjs[21],BoardObjs[22],BoardObjs[23],BoardObjs[24]]]
print(len(Board))
print(len(BoardObjs))
#Images
img_0 = pygame.Rect(0,0,64,64)
TouchingReds = 0
def CheckMatches(Board):
VerticalMatch = False
HorizontalMatch = False
for m in range(len(Board)):
for n in range(len(Board[1])-2):
if Board[m][n]==Board[m][n+1]==Board[m][n+2]:
HorizontalMatch = True
Board[m][n] = None
Board[m][n+1] = None
Board[m][n+2] = None
BoardObjs[m][n] = None
BoardObjs[m][n+1] = None
BoardObjs[m][n+2] = None
for o in range(len(Board[1])-2):
for u in range(len(Board)):
if Board[o][u]==Board[o+1][u]==Board[o+2][u]:
VerticalMatch = True
Board[o][u] = None
Board[o+1][u] = None
Board[o+2][u] = None
BoardObjs[o][u] = None
BoardObjs[o+1][u] = None
BoardObjs[o+2][u] = None
def Draw(Board,BoardXYs):
DoAppend = True
Win.fill((255,255,255))
for y in range(len(BoardObjs)):
for u in range(len(BoardObjs[1])):
if BoardObjs[y][u] != None:
BoardObjs[y][u].Draw()
pygame.display.update()
def Swap():
FirstClick = False
for i in range(len(BoardObjs)):
for t in range(len(BoardObjs[1])):
if BoardObjs[i][t] != None:
if BoardObjs[i][t].CheckClick and FirstClick == True:
BoardObjs[num][num2]=BoardObjs[i][t]
BoardObjs[i][t]=Buffer
FirstClick = False
print(BoardObjs[i][t])
if BoardObjs[i][t] !=None:
if BoardObjs[i][t].CheckClick:
Buffer = BoardObjs[i]
num =i
num2 = t
FirstClick = True
#ZEROS - RED
#ONES - GREEN
#TWOS - BLUE
clock = pygame.time.Clock()
def GameLoop():
run = True
while run:
clock.tick(60)
for event in pygame.event.get():
if event.type == pygame.QUIT:
run = False
CheckMatches(Board)
Draw(Board,BoardXYs)
Swap()
pygame.quit()
sys.exit()
if __name__ == "__main__":
GameLoop()

The mistake is in the Swap() function. Buffer is assigned to an item in the grid (BoardObjs[i][t]=Buffer). So Buffer needs to be a Button object instead of a row (list of objects):
Buffer = BoardObjs[i]
Buffer = BoardObjs[i][t]
The initialization of the board can be simplified:
#Board
Board = [[randrange(0,3) for column in range(Columns)] for row in range(Rows) ]
BoardXYs = [[(j*160+320, k*160) for j in range(Columns)]for k in range(Rows)]
BoardObjs = []
for r in range(len(Board)):
BoardObjs.append([])
for t in range(len(Board[1])):
image = [red_img, green_img, blue_img][board[r][t]]
BoardObjs[-1].append(Button(*BoardXYs[r][t], image, 100))

The CheckClick function you added is a function, not an attribute. Make sure to put parentheses after the name, even if it takes no argument.
Try, BoardObjs[i][t].CheckClick()

Game AI works powerfully on one side and becomes dumb on the other in Tic-Tac-Toe

I am trying to make a Tic-Tac-Toe game in Python using PyGame and the MiniMax algorithm. The AI plays really well when given the first chance (playing as 'X'), but becomes dumb enough to help the user win when not given the first chance (playing as 'O'). I think I know what the problem is but changing it is messing with the whole program and is not going by the given docstrings.
I've made two python files - one for the GUI (runner.py) and the other for the logic behind the game and the AI (tictactoe.py).
This is the logic behind the game:
# Import module `copy` for function `deepcopy` to deeply copy an
# original (mutable) object to save the object from mutations
import copy
X = 'X'
O = 'O'
EMPTY = None
def initial_state():
"""Returns starting state of the board
"""
return [
[EMPTY, EMPTY, EMPTY],
[EMPTY, EMPTY, EMPTY],
[EMPTY, EMPTY, EMPTY]
]
def display(board, autoprint=False):
"""Displays the board nested list as
a 3x3 matrix for board visualization
"""
vis_board = ''
for row in board:
for playr in row:
if playr is None:
playr = ' '
playr += ' '
vis_board += playr
vis_board += '\n'
if autoprint:
print(vis_board)
return vis_board
def player(board):
"""Returns player who has the next turn on a board
"""
global X, O
# Initial values for every call of the function
X_count = 0
O_count = 0
for row in board:
for playr in row:
if playr == X:
X_count += 1
elif playr == O:
O_count += 1
# `X` always starts first
if O_count < X_count:
return O
return X
def actions(board):
"""Returns set of all possible actions
(i, j) available on the board
"""
global EMPTY
action_set = set()
for i, row in enumerate(board):
for j, playr in enumerate(row):
if playr is EMPTY:
action_set.add((i, j))
return action_set
def result(board, action):
"""Returns the board that results from
making move (i, j) on the board.
"""
global EMPTY
if type(action) is not tuple or len(action) != 2:
raise Exception('invalid action taken')
# Using `deepcopy` to make a deepcopy of *board*
# as duplication by slicing entire list and by
# type conversion is not working poperly
dup_board = copy.deepcopy(board)
# Unpack the coordinates as `I` and `J`
I, J = action
# Check if place has not already been used
if dup_board[I][J] is EMPTY:
dup_board[I][J] = player(dup_board)
else:
raise Exception('invalid action taken')
return dup_board
def is_full(board):
"""Returns True if all places have been occupied, else returns False
"""
global EMPTY
for row in board:
for playr in row:
if playr is EMPTY:
return False
return True
def winner(board):
"""Returns the winner of the game, if there is one.
"""
winr = None # Initial declaration to avoid errors if no winner found
# Check diagonally
if (board[1][1] == board[0][0] and board[0][0] == board[2][2])\
or (board[1][1] == board[0][2] and board[0][2] == board[2][0]):
winr = board[1][1]
return winr
for i in range(3):
# Check each row for three-in-a-row
if board[i][0] == board[i][1] and board[i][1] == board[i][2]:
winr = board[i][1]
break
# Check each column for three-in-a-column
elif board[0][i] == board[1][i] and board[1][i] == board[2][i]:
winr = board[1][i]
break
return winr
def terminal(board):
"""Returns True if game is over, False otherwise.
"""
if winner(board) is None and not is_full(board):
return False
return True
def utility(board):
"""Returns 1 if X has won the game, -1 if O has won, 0 otherwise.
"""
global X, O
if terminal(board):
winr = winner(board)
if winr == X:
util = 1
elif winr == O:
util = -1
else:
util = 0
return util
return None
def get_best_score(board, is_max_turn):
"""Returns the best value of values of all possible moves
"""
if utility(board) is not None:
return utility(board)
scores = []
# Recursively help `minimax` choose the best action
# in `actions` of *board* by returning the best value
for action in actions(board):
rslt = result(board, action)
scores.append(get_best_score(rslt, not is_max_turn))
return max(scores) if is_max_turn else min(scores)
def minimax(board):
"""Returns the optimal action for the current player on the board.
"""
if terminal(board):
return None
best_score = -float('inf') # Least possible score
best_action = None
for action in actions(board):
rslt = result(board, action)
score = get_best_score(rslt, False)
if score > best_score:
best_score = score
best_action = action
return best_action
The GUI code file:
# Import module `PyGame` for a GUI
import pygame
import sys
import time
# Import module `tictactoe` (from the same folder as
# this file `__file__`) for the logic of the game's AI
import tictactoe as ttt
pygame.init()
size = width, height = 600, 400
# Colors
black = (0, 0, 0)
white = (255, 255, 255)
screen = pygame.display.set_mode(size)
mediumFont = pygame.font.Font('OpenSans-Regular.ttf', 24)
largeFont = pygame.font.Font('OpenSans-Regular.ttf', 40)
moveFont = pygame.font.Font('OpenSans-Regular.ttf', 60)
user = None
board = ttt.initial_state()
ai_turn = False
while True:
for event in pygame.event.get():
if event.type == pygame.QUIT:
sys.exit()
screen.fill(black)
# Let user choose a player.
if user is None:
# Draw title
title = largeFont.render('Play Tic-Tac-Toe', True, white)
titleRect = title.get_rect()
titleRect.center = (round(width/2), 50)
screen.blit(title, titleRect)
# Draw buttons
playXButton = pygame.Rect(round(width/8), round(height/2), round(width/4), 50)
playX = mediumFont.render('Play as X', True, black)
playXRect = playX.get_rect()
playXRect.center = playXButton.center
pygame.draw.rect(screen, white, playXButton)
screen.blit(playX, playXRect)
playOButton = pygame.Rect(5*round(width/8), round(height/2), round(width/4), 50)
playO = mediumFont.render('Play as O', True, black)
playORect = playO.get_rect()
playORect.center = playOButton.center
pygame.draw.rect(screen, white, playOButton)
screen.blit(playO, playORect)
# Check if button is clicked
click, _, _ = pygame.mouse.get_pressed()
if click == 1:
mouse = pygame.mouse.get_pos()
time.sleep(0.5)
if playXButton.collidepoint(mouse):
user = ttt.X
elif playOButton.collidepoint(mouse):
user = ttt.O
else:
# Draw game board
tile_size = 80
tile_origin = (width / 2 - (1.5 * tile_size),
height / 2 - (1.5 * tile_size))
tiles = []
for i in range(3):
row = []
for j in range(3):
rect = pygame.Rect(
round(tile_origin[0]+j*tile_size),
round(tile_origin[1]+i*tile_size),
round(tile_size), round(tile_size)
)
pygame.draw.rect(screen, white, rect, 3)
if board[i][j] != ttt.EMPTY:
move = moveFont.render(board[i][j], True, white)
moveRect = move.get_rect()
moveRect.center = rect.center
screen.blit(move, moveRect)
row.append(rect)
tiles.append(row)
game_over = ttt.terminal(board)
player = ttt.player(board)
# Show title
if game_over:
winner = ttt.winner(board)
if winner is None:
title = f'Game Over: Tie.'
else:
title = f'Game Over: {winner} wins.'
elif user == player:
title = f'Play as {user}'
else:
title = f'AI thinking...'
title = largeFont.render(title, True, white)
titleRect = title.get_rect()
titleRect.center = (round(width/2), 30)
screen.blit(title, titleRect)
# Check for AI move
if user != player and not game_over:
if ai_turn:
time.sleep(0.5)
move = ttt.minimax(board)
board = ttt.result(board, move)
ai_turn = False
else:
ai_turn = True
# Check for a user move
click, _, _ = pygame.mouse.get_pressed()
if click == 1 and user == player and not game_over:
mouse = pygame.mouse.get_pos()
for i in range(3):
for j in range(3):
if (board[i][j] == ttt.EMPTY and tiles[i][j].collidepoint(mouse)):
board = ttt.result(board, (i, j))
if game_over:
againButton = pygame.Rect(round(width/3), round(height-65), round(width/3), 50)
again = mediumFont.render('Play Again', True, black)
againRect = again.get_rect()
againRect.center = againButton.center
pygame.draw.rect(screen, white, againButton)
screen.blit(again, againRect)
click, _, _ = pygame.mouse.get_pressed()
if click == 1:
mouse = pygame.mouse.get_pos()
if againButton.collidepoint(mouse):
time.sleep(0.2)
user = None
board = ttt.initial_state()
ai_turn = False
pygame.display.flip()
These are the sidenotes for the answers given by the organization that gave these questions:
No changing the no. of parameters or the parameters themselves in any functions.
Follow the docstrings written in all functions
New functions may be defined as you wish
Please let me know if there are any bugs/errors which are causing the AI to be dumb when playing as 'O'. I believe the bug is in utility, but I can't change the code because it is not allowed (written in the docstrings).
Thank you!
Edit: The problem has been ALMOST solved, but the AI becomes dumb sometimes, like not trying to block the user's move with the opposite symbol, etc.

best_score = -float('inf') # Least possible score
you need to vary this according to the player for which you calculate the move. I think because of this the negative player is choosing random/first plausible move.
I have implemented minimax and related heuristics like 2 times, and always found that using the "negamax" approach worked best, since you don't need to worry about when to apply max and when min based on the player.

Reusing a Tkinter window for a game of Tic Tac Toe

I've written a program (listed below) which plays Tic Tic Toe with a Tkinter GUI. If I invoke it like this:
root = tk.Tk()
root.title("Tic Tac Toe")
player1 = QPlayer(mark="X")
player2 = QPlayer(mark="O")
human_player = HumanPlayer(mark="X")
player2.epsilon = 0 # For playing the actual match, disable exploratory moves
game = Game(root, player1=human_player, player2=player2)
game.play()
root.mainloop()
it works as expected and the HumanPlayer can play against player2, which is a computer player (specifically, a QPlayer). The figure below shows how the HumanPlayer (with mark "X") easily wins.
In order to improve the performance of the QPlayer, I'd like to 'train' it by allowing it to play against an instance of itself before playing against the human player. I've tried modifying the above code as follows:
root = tk.Tk()
root.title("Tic Tac Toe")
player1 = QPlayer(mark="X")
player2 = QPlayer(mark="O")
for _ in range(1): # Play a couple of training games
training_game = Game(root, player1, player2)
training_game.play()
training_game.reset()
human_player = HumanPlayer(mark="X")
player2.epsilon = 0 # For playing the actual match, disable exploratory moves
game = Game(root, player1=human_player, player2=player2)
game.play()
root.mainloop()
What I then find, however, is that the Tkinter window contains two Tic Tac Toe boards (depicted below), and the buttons of the second board are unresponsive.
In the above code, the reset() method is the same one as used in the callback of the "Reset" button, which usually makes the board blank again to start over. I don't understand why I'm seeing two boards (of which one is unresponsive) instead of a single, responsive board?
For reference, the full code of the Tic Tac Toe program is listed below (with the 'offensive' lines of code commented out):
import numpy as np
import Tkinter as tk
import copy
class Game:
def __init__(self, master, player1, player2, Q_learn=None, Q={}, alpha=0.3, gamma=0.9):
frame = tk.Frame()
frame.grid()
self.master = master
self.player1 = player1
self.player2 = player2
self.current_player = player1
self.other_player = player2
self.empty_text = ""
self.board = Board()
self.buttons = [[None for _ in range(3)] for _ in range(3)]
for i in range(3):
for j in range(3):
self.buttons[i][j] = tk.Button(frame, height=3, width=3, text=self.empty_text, command=lambda i=i, j=j: self.callback(self.buttons[i][j]))
self.buttons[i][j].grid(row=i, column=j)
self.reset_button = tk.Button(text="Reset", command=self.reset)
self.reset_button.grid(row=3)
self.Q_learn = Q_learn
self.Q_learn_or_not()
if self.Q_learn:
self.Q = Q
self.alpha = alpha # Learning rate
self.gamma = gamma # Discount rate
self.share_Q_with_players()
def Q_learn_or_not(self): # If either player is a QPlayer, turn on Q-learning
if self.Q_learn is None:
if isinstance(self.player1, QPlayer) or isinstance(self.player2, QPlayer):
self.Q_learn = True
def share_Q_with_players(self): # The action value table Q is shared with the QPlayers to help them make their move decisions
if isinstance(self.player1, QPlayer):
self.player1.Q = self.Q
if isinstance(self.player2, QPlayer):
self.player2.Q = self.Q
def callback(self, button):
if self.board.over():
pass # Do nothing if the game is already over
else:
if isinstance(self.current_player, HumanPlayer) and isinstance(self.other_player, HumanPlayer):
if self.empty(button):
move = self.get_move(button)
self.handle_move(move)
elif isinstance(self.current_player, HumanPlayer) and isinstance(self.other_player, ComputerPlayer):
computer_player = self.other_player
if self.empty(button):
human_move = self.get_move(button)
self.handle_move(human_move)
if not self.board.over(): # Trigger the computer's next move
computer_move = computer_player.get_move(self.board)
self.handle_move(computer_move)
def empty(self, button):
return button["text"] == self.empty_text
def get_move(self, button):
info = button.grid_info()
move = (info["row"], info["column"]) # Get move coordinates from the button's metadata
return move
def handle_move(self, move):
try:
if self.Q_learn:
self.learn_Q(move)
i, j = move # Get row and column number of the corresponding button
self.buttons[i][j].configure(text=self.current_player.mark) # Change the label on the button to the current player's mark
self.board.place_mark(move, self.current_player.mark) # Update the board
if self.board.over():
self.declare_outcome()
else:
self.switch_players()
except:
print "There was an error handling the move."
pass # This might occur if no moves are available and the game is already over
def declare_outcome(self):
if self.board.winner() is None:
print "Cat's game."
else:
print "The game is over. The player with mark %s won!" % self.current_player.mark
def reset(self):
print "Resetting..."
for i in range(3):
for j in range(3):
self.buttons[i][j].configure(text=self.empty_text)
self.board = Board(grid=np.ones((3,3))*np.nan)
self.current_player = self.player1
self.other_player = self.player2
# np.random.seed(seed=0) # Set the random seed to zero to see the Q-learning 'in action' or for debugging purposes
self.play()
def switch_players(self):
if self.current_player == self.player1:
self.current_player = self.player2
self.other_player = self.player1
else:
self.current_player = self.player1
self.other_player = self.player2
def play(self):
if isinstance(self.player1, HumanPlayer) and isinstance(self.player2, HumanPlayer):
pass # For human vs. human, play relies on the callback from button presses
elif isinstance(self.player1, HumanPlayer) and isinstance(self.player2, ComputerPlayer):
pass
elif isinstance(self.player1, ComputerPlayer) and isinstance(self.player2, HumanPlayer):
first_computer_move = player1.get_move(self.board) # If player 1 is a computer, it needs to be triggered to make the first move.
self.handle_move(first_computer_move)
elif isinstance(self.player1, ComputerPlayer) and isinstance(self.player2, ComputerPlayer):
while not self.board.over(): # Make the two computer players play against each other without button presses
move = self.current_player.get_move(self.board)
self.handle_move(move)
def learn_Q(self, move): # If Q-learning is toggled on, "learn_Q" should be called after receiving a move from an instance of Player and before implementing the move (using Board's "place_mark" method)
state_key = QPlayer.make_and_maybe_add_key(self.board, self.current_player.mark, self.Q)
next_board = self.board.get_next_board(move, self.current_player.mark)
reward = next_board.give_reward()
next_state_key = QPlayer.make_and_maybe_add_key(next_board, self.other_player.mark, self.Q)
if next_board.over():
expected = reward
else:
next_Qs = self.Q[next_state_key] # The Q values represent the expected future reward for player X for each available move in the next state (after the move has been made)
if self.current_player.mark == "X":
expected = reward + (self.gamma * min(next_Qs.values())) # If the current player is X, the next player is O, and the move with the minimum Q value should be chosen according to our "sign convention"
elif self.current_player.mark == "O":
expected = reward + (self.gamma * max(next_Qs.values())) # If the current player is O, the next player is X, and the move with the maximum Q vlue should be chosen
change = self.alpha * (expected - self.Q[state_key][move])
self.Q[state_key][move] += change
class Board:
def __init__(self, grid=np.ones((3,3))*np.nan):
self.grid = grid
def winner(self):
rows = [self.grid[i,:] for i in range(3)]
cols = [self.grid[:,j] for j in range(3)]
diag = [np.array([self.grid[i,i] for i in range(3)])]
cross_diag = [np.array([self.grid[2-i,i] for i in range(3)])]
lanes = np.concatenate((rows, cols, diag, cross_diag)) # A "lane" is defined as a row, column, diagonal, or cross-diagonal
any_lane = lambda x: any([np.array_equal(lane, x) for lane in lanes]) # Returns true if any lane is equal to the input argument "x"
if any_lane(np.ones(3)):
return "X"
elif any_lane(np.zeros(3)):
return "O"
def over(self): # The game is over if there is a winner or if no squares remain empty (cat's game)
return (not np.any(np.isnan(self.grid))) or (self.winner() is not None)
def place_mark(self, move, mark): # Place a mark on the board
num = Board.mark2num(mark)
self.grid[tuple(move)] = num
#staticmethod
def mark2num(mark): # Convert's a player's mark to a number to be inserted in the Numpy array representing the board. The mark must be either "X" or "O".
d = {"X": 1, "O": 0}
return d[mark]
def available_moves(self):
return [(i,j) for i in range(3) for j in range(3) if np.isnan(self.grid[i][j])]
def get_next_board(self, move, mark):
next_board = copy.deepcopy(self)
next_board.place_mark(move, mark)
return next_board
def make_key(self, mark): # For Q-learning, returns a 10-character string representing the state of the board and the player whose turn it is
fill_value = 9
filled_grid = copy.deepcopy(self.grid)
np.place(filled_grid, np.isnan(filled_grid), fill_value)
return "".join(map(str, (map(int, filled_grid.flatten())))) + mark
def give_reward(self): # Assign a reward for the player with mark X in the current board position.
if self.over():
if self.winner() is not None:
if self.winner() == "X":
return 1.0 # Player X won -> positive reward
elif self.winner() == "O":
return -1.0 # Player O won -> negative reward
else:
return 0.5 # A smaller positive reward for cat's game
else:
return 0.0 # No reward if the game is not yet finished
class Player(object):
def __init__(self, mark):
self.mark = mark
self.get_opponent_mark()
def get_opponent_mark(self):
if self.mark == 'X':
self.opponent_mark = 'O'
elif self.mark == 'O':
self.opponent_mark = 'X'
else:
print "The player's mark must be either 'X' or 'O'."
class HumanPlayer(Player):
def __init__(self, mark):
super(HumanPlayer, self).__init__(mark=mark)
class ComputerPlayer(Player):
def __init__(self, mark):
super(ComputerPlayer, self).__init__(mark=mark)
class RandomPlayer(ComputerPlayer):
def __init__(self, mark):
super(RandomPlayer, self).__init__(mark=mark)
#staticmethod
def get_move(board):
moves = board.available_moves()
if moves: # If "moves" is not an empty list (as it would be if cat's game were reached)
return moves[np.random.choice(len(moves))] # Apply random selection to the index, as otherwise it will be seen as a 2D array
class THandPlayer(ComputerPlayer):
def __init__(self, mark):
super(THandPlayer, self).__init__(mark=mark)
def get_move(self, board):
moves = board.available_moves()
if moves:
for move in moves:
if THandPlayer.next_move_winner(board, move, self.mark):
return move
elif THandPlayer.next_move_winner(board, move, self.opponent_mark):
return move
else:
return RandomPlayer.get_move(board)
#staticmethod
def next_move_winner(board, move, mark):
return board.get_next_board(move, mark).winner() == mark
class QPlayer(ComputerPlayer):
def __init__(self, mark, Q={}, epsilon=0.2):
super(QPlayer, self).__init__(mark=mark)
self.Q = Q
self.epsilon = epsilon
def get_move(self, board):
if np.random.uniform() < self.epsilon: # With probability epsilon, choose a move at random ("epsilon-greedy" exploration)
return RandomPlayer.get_move(board)
else:
state_key = QPlayer.make_and_maybe_add_key(board, self.mark, self.Q)
Qs = self.Q[state_key]
if self.mark == "X":
return QPlayer.stochastic_argminmax(Qs, max)
elif self.mark == "O":
return QPlayer.stochastic_argminmax(Qs, min)
#staticmethod
def make_and_maybe_add_key(board, mark, Q): # Make a dictionary key for the current state (board + player turn) and if Q does not yet have it, add it to Q
state_key = board.make_key(mark)
if Q.get(state_key) is None:
moves = board.available_moves()
Q[state_key] = {move: 0.0 for move in moves} # The available moves in each state are initially given a default value of zero
return state_key
#staticmethod
def stochastic_argminmax(Qs, min_or_max): # Determines either the argmin or argmax of the array Qs such that if there are 'ties', one is chosen at random
min_or_maxQ = min_or_max(Qs.values())
if Qs.values().count(min_or_maxQ) > 1: # If there is more than one move corresponding to the maximum Q-value, choose one at random
best_options = [move for move in Qs.keys() if Qs[move] == min_or_maxQ]
move = best_options[np.random.choice(len(best_options))]
else:
move = min_or_max(Qs, key=Qs.get)
return move
root = tk.Tk()
root.title("Tic Tac Toe")
player1 = QPlayer(mark="X")
player2 = QPlayer(mark="O")
# for _ in range(1): # Play a couple of training games
# training_game = Game(root, player1, player2)
# training_game.play()
# training_game.reset()
human_player = HumanPlayer(mark="X")
player2.epsilon = 0 # For playing the actual match, disable exploratory moves
game = Game(root, player1=human_player, player2=player2)
game.play()
root.mainloop()

It looks like you only need to create the board one time as the reset method resets it for the new players. Each type you create a Game instance, you create a new Tk frame so you either need to destroy the old one or you can reuse the windows by not creating a new Game instance each time.
A minor change to the main code at the bottom of the file seems to fix this:
player1 = QPlayer(mark="X")
player2 = QPlayer(mark="O")
game = Game(root, player1, player2)
for _ in range(1): # Play a couple of training games
game.play()
game.reset()
human_player = HumanPlayer(mark="X")
player2.epsilon = 0 # For playing the actual match, disable exploratory moves
game.player1 = human_player
game.player2 = player2
game.play()

I've noticed in this code that if you were to use it in python 3.2.3 or similar editions all of the print statements would need to be enclosed by brackets, and you'd need to add tkinter in the program by importing it.

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

How do I implement a minimax function in a Tic-Tac-Toe game in Python? [duplicate] - python

Related

How to turn set items into indexes for an array

Optimizing Negamax Function with 5x5 Hexapawn

Attribute Error: List object has no attribute CheckClick

Game AI works powerfully on one side and becomes dumb on the other in Tic-Tac-Toe

Reusing a Tkinter window for a game of Tic Tac Toe

Categories

Resources