Source code for pettingzoo.utils.wrappers.terminate_illegal

from __future__ import annotations

from pettingzoo.utils.env import ActionType, AECEnv, AgentID, ObsType
from pettingzoo.utils.env_logger import EnvLogger
from pettingzoo.utils.wrappers.base import BaseWrapper



[docs]
class TerminateIllegalWrapper(BaseWrapper[AgentID, ObsType, ActionType]):
    """This wrapper terminates the game with the current player losing in case of illegal values.

    Args:
        illegal_reward: number that is the value of the player making an illegal move.
    """

    def __init__(
        self, env: AECEnv[AgentID, ObsType, ActionType], illegal_reward: float
    ):
        super().__init__(env)
        self._illegal_value = illegal_reward
        self._prev_obs = None
        self._prev_info = None
        self._terminated = False  # terminated by an illegal move

    def reset(self, seed: int | None = None, options: dict | None = None) -> None:
        self._terminated = False
        self._prev_obs = None
        self._prev_info = None
        super().reset(seed=seed, options=options)

    def observe(self, agent: AgentID) -> ObsType | None:
        obs = super().observe(agent)
        if agent == self.agent_selection:
            self._prev_obs = obs
            if self.agent_selection in self.infos:
                self._prev_info = self.infos[self.agent_selection]
            else:
                self._prev_info = {}
        return obs

    def step(self, action: ActionType) -> None:
        current_agent = self.agent_selection
        if self._prev_obs is None:
            self.observe(self.agent_selection)
        if isinstance(self._prev_obs, dict):
            assert (
                "action_mask" in self._prev_obs
            ), f"`action_mask` not found in dictionary observation: {self._prev_obs}. Action mask must either be in `observation['action_mask']` or `info['action_mask']` to use TerminateIllegalWrapper."
            _prev_action_mask = self._prev_obs["action_mask"]

        else:
            assert self._prev_info is not None
            assert (
                "action_mask" in self._prev_info
            ), f"`action_mask` not found in info for non-dictionary observation: {self._prev_info}. Action mask must either be in observation['action_mask'] or info['action_mask'] to use TerminateIllegalWrapper."
            _prev_action_mask = self._prev_info["action_mask"]
        self._prev_obs = None
        self._prev_info = None
        if self._terminated and (
            self.terminations[self.agent_selection]
            or self.truncations[self.agent_selection]
        ):
            self.env.unwrapped._was_dead_step(action)
        elif (
            not self.terminations[self.agent_selection]
            and not self.truncations[self.agent_selection]
            and not _prev_action_mask[action]
        ):
            EnvLogger.warn_on_illegal_move()
            self.env.unwrapped._cumulative_rewards[self.agent_selection] = 0
            self.env.unwrapped.terminations = {d: True for d in self.agents}
            self.env.unwrapped.truncations = {d: True for d in self.agents}
            self.env.unwrapped.rewards = {d: 0 for d in self.truncations}
            self.env.unwrapped.rewards[current_agent] = float(self._illegal_value)
            self.env.unwrapped._accumulate_rewards()
            self.env.unwrapped._deads_step_first()
            self._terminated = True
        else:
            super().step(action)

    def __str__(self) -> str:
        return str(self.env)