# noqa: D212, D415
"""
# Tic Tac Toe
```{figure} classic_tictactoe.gif
:width: 140px
:name: tictactoe
```
This environment is part of the <a href='..'>classic environments</a>. Please read that page first for general information.
| Import | `from pettingzoo.classic import tictactoe_v3` |
|--------------------|-----------------------------------------------|
| Actions | Discrete |
| Parallel API | Yes |
| Manual Control | No |
| Agents | `agents= ['player_1', 'player_2']` |
| Agents | 2 |
| Action Shape | (1) |
| Action Values | [0, 8] |
| Observation Shape | (3, 3, 2) |
| Observation Values | [0,1] |
Tic-tac-toe is a simple turn based strategy game where 2 players, X and O, take turns marking spaces on a 3 x 3 grid. The first player to place 3 of their marks in a horizontal, vertical, or diagonal line is the winner.
### Observation Space
The observation is a dictionary which contains an `'observation'` element which is the usual RL observation described below, and an `'action_mask'` which holds the legal moves, described in the Legal Actions Mask section.
The main observation is 2 planes of the 3x3 board. For player_1, the first plane represents the placement of Xs, and the second plane shows the placement of Os. The possible values for each cell are 0 or 1; in the first plane, 1 indicates that an X has been placed in that cell, and 0 indicates
that X is not in that cell. Similarly, in the second plane, 1 indicates that an O has been placed in that cell, while 0 indicates that an O has not been placed. For player_2, the observation is the same, but Xs and Os swap positions, so Os are encoded in plane 1 and Xs in plane 2. This allows for
self-play.
#### Legal Actions Mask
The legal moves available to the current agent are found in the `action_mask` element of the dictionary observation. The `action_mask` is a binary vector where each index of the vector represents whether the action is legal or not. The `action_mask` will be all zeros for any agent except the one
whose turn it is. Taking an illegal move ends the game with a reward of -1 for the illegally moving agent and a reward of 0 for all other agents.
### Action Space
Each action from 0 to 8 represents placing either an X or O in the corresponding cell. The cells are indexed as follows:
```
0 | 3 | 6
_________
1 | 4 | 7
_________
2 | 5 | 8
```
### Rewards
| Winner | Loser |
| :----: | :---: |
| +1 | -1 |
If the game ends in a draw, both players will receive a reward of 0.
### Version History
* v3: Fixed bug in arbitrary calls to observe() (1.8.0)
* v2: Legal action mask in observation replaced illegal move list in infos (1.5.0)
* v1: Bumped version of all environments due to adoption of new agent iteration scheme where all agents are iterated over after they are done (1.4.0)
* v0: Initial versions release (1.0.0)
"""
from __future__ import annotations
import os
import gymnasium
import numpy as np
import pygame
from gymnasium import spaces
from gymnasium.utils import EzPickle
from pettingzoo import AECEnv
from pettingzoo.classic.tictactoe.board import TTT_GAME_NOT_OVER, TTT_TIE, Board
from pettingzoo.utils import AgentSelector, wrappers
def get_image(path):
"""Return a pygame image loaded from the given path."""
from os import path as os_path
cwd = os_path.dirname(__file__)
image = pygame.image.load(cwd + "/" + path)
return image
def get_font(path, size):
"""Return a pygame font loaded from the given path."""
from os import path as os_path
cwd = os_path.dirname(__file__)
font = pygame.font.Font((cwd + "/" + path), size)
return font
[docs]
def env(**kwargs):
env = raw_env(**kwargs)
env = wrappers.TerminateIllegalWrapper(env, illegal_reward=-1)
env = wrappers.AssertOutOfBoundsWrapper(env)
env = wrappers.OrderEnforcingWrapper(env)
return env
[docs]
class raw_env(AECEnv, EzPickle):
metadata = {
"render_modes": ["human", "rgb_array"],
"name": "tictactoe_v3",
"is_parallelizable": False,
"render_fps": 1,
}
def __init__(
self, render_mode: str | None = None, screen_height: int | None = 1000
):
super().__init__()
EzPickle.__init__(self, render_mode, screen_height)
self.board = Board()
self.agents = ["player_1", "player_2"]
self.possible_agents = self.agents[:]
self.action_spaces = {i: spaces.Discrete(9) for i in self.agents}
self.observation_spaces = {
i: spaces.Dict(
{
"observation": spaces.Box(
low=0, high=1, shape=(3, 3, 2), dtype=np.int8
),
"action_mask": spaces.Box(low=0, high=1, shape=(9,), dtype=np.int8),
}
)
for i in self.agents
}
self.rewards = {i: 0 for i in self.agents}
self.terminations = {i: False for i in self.agents}
self.truncations = {i: False for i in self.agents}
self.infos = {i: {} for i in self.agents}
self._agent_selector = AgentSelector(self.agents)
self.agent_selection = self._agent_selector.reset()
self.render_mode = render_mode
self.screen_height = screen_height
self.screen = None
if self.render_mode == "human":
self.clock = pygame.time.Clock()
[docs]
def observe(self, agent):
board_vals = np.array(self.board.squares).reshape(3, 3)
cur_player = self.possible_agents.index(agent)
opp_player = (cur_player + 1) % 2
observation = np.empty((3, 3, 2), dtype=np.int8)
# this will give a copy of the board that is 1 for player 1's
# marks and zero for every other square, whether empty or not.
observation[:, :, 0] = np.equal(board_vals, cur_player + 1)
observation[:, :, 1] = np.equal(board_vals, opp_player + 1)
action_mask = self._get_mask(agent)
return {"observation": observation, "action_mask": action_mask}
def _get_mask(self, agent):
action_mask = np.zeros(9, dtype=np.int8)
# Per the documentation, the mask of any agent other than the
# currently selected one is all zeros.
if agent == self.agent_selection:
for i in self.board.legal_moves():
action_mask[i] = 1
return action_mask
[docs]
def observation_space(self, agent):
return self.observation_spaces[agent]
[docs]
def action_space(self, agent):
return self.action_spaces[agent]
# action in this case is a value from 0 to 8 indicating position to move on tictactoe board
[docs]
def step(self, action):
if (
self.terminations[self.agent_selection]
or self.truncations[self.agent_selection]
):
return self._was_dead_step(action)
self.board.play_turn(self.agents.index(self.agent_selection), action)
status = self.board.game_status()
if status != TTT_GAME_NOT_OVER:
if status == TTT_TIE:
pass
else:
winner = status # either TTT_PLAYER1_WIN or TTT_PLAYER2_WIN
loser = winner ^ 1 # 0 -> 1; 1 -> 0
self.rewards[self.agents[winner]] += 1
self.rewards[self.agents[loser]] -= 1
# once either play wins or there is a draw, game over, both players are done
self.terminations = {i: True for i in self.agents}
self._accumulate_rewards()
self.agent_selection = self._agent_selector.next()
if self.render_mode == "human":
self.render()
[docs]
def reset(self, seed=None, options=None):
self.board.reset()
self.agents = self.possible_agents[:]
self.rewards = {i: 0 for i in self.agents}
self._cumulative_rewards = {i: 0 for i in self.agents}
self.terminations = {i: False for i in self.agents}
self.truncations = {i: False for i in self.agents}
self.infos = {i: {} for i in self.agents}
# selects the first agent
self._agent_selector.reinit(self.agents)
self.agent_selection = self._agent_selector.reset()
if self.render_mode is not None and self.screen is None:
pygame.init()
if self.render_mode == "human":
self.screen = pygame.display.set_mode(
(self.screen_height, self.screen_height)
)
pygame.display.set_caption("Tic-Tac-Toe")
elif self.render_mode == "rgb_array":
self.screen = pygame.Surface((self.screen_height, self.screen_height))
[docs]
def render(self):
if self.render_mode is None:
gymnasium.logger.warn(
"You are calling render method without specifying any render mode."
)
return
screen_height = self.screen_height
screen_width = self.screen_height
# Setup dimensions for 'x' and 'o' marks
tile_size = int(screen_height / 4)
# Load and blit the board image for the game
board_img = get_image(os.path.join("img", "board.png"))
board_img = pygame.transform.scale(
board_img, (int(screen_width), int(screen_height))
)
self.screen.blit(board_img, (0, 0))
# Load and blit actions for the game
def getSymbol(input):
if input == 0:
return None
elif input == 1:
return "cross"
else:
return "circle"
board_state = list(map(getSymbol, self.board.squares))
mark_pos = 0
for x in range(3):
for y in range(3):
mark = board_state[mark_pos]
mark_pos += 1
if mark is None:
continue
mark_img = get_image(os.path.join("img", mark + ".png"))
mark_img = pygame.transform.scale(mark_img, (tile_size, tile_size))
self.screen.blit(
mark_img,
(
(screen_width / 3.1) * x + (screen_width / 17),
(screen_width / 3.145) * y + (screen_height / 19),
),
)
if self.render_mode == "human":
pygame.display.update()
self.clock.tick(self.metadata["render_fps"])
observation = np.array(pygame.surfarray.pixels3d(self.screen))
return (
np.transpose(observation, axes=(1, 0, 2))
if self.render_mode == "rgb_array"
else None
)