(WIP) Creating Environments: Environment Logic#
Introduction#
Now that we have a basic understanding of the structure of environment repositories, we can start thinking about the fun part - environment logic!
For this tutorial, we will be creating a two-player game consisting of a prisoner, trying to escape, and a guard, trying to catch the prisoner. This game will be played on a 7x7 grid, where:
The prisoner starts in the top left corner,
the guard starts in the bottom right corner,
the escape door is randomly placed in the middle of the grid, and
Both the prisoner and the guard can move in any of the four cardinal directions (up, down, left, right).
Code#
/custom-environment/env/custom_environment.py#
import functools
import random
from copy import copy
import numpy as np
from gymnasium.spaces import Discrete, MultiDiscrete
from pettingzoo.utils.env import ParallelEnv
class CustomEnvironment(ParallelEnv):
def __init__(self):
self.escape_y = None
self.escape_x = None
self.guard_y = None
self.guard_x = None
self.prisoner_y = None
self.prisoner_x = None
self.timestep = None
self.possible_agents = ["prisoner", "guard"]
def reset(self, seed=None, return_info=False, options=None):
self.agents = copy(self.possible_agents)
self.timestep = 0
self.prisoner_x = 0
self.prisoner_y = 0
self.guard_x = 7
self.guard_y = 7
self.escape_x = random.randint(2, 5)
self.escape_y = random.randint(2, 5)
observations = {
a: (
self.prisoner_x + 7 * self.prisoner_y,
self.guard_x + 7 * self.guard_y,
self.escape_x + 7 * self.escape_y,
)
for a in self.agents
}
return observations
def step(self, actions):
# Execute actions
prisoner_action = actions["prisoner"]
guard_action = actions["guard"]
if prisoner_action == 0 and self.prisoner_x > 0:
self.prisoner_x -= 1
elif prisoner_action == 1 and self.prisoner_x < 6:
self.prisoner_x += 1
elif prisoner_action == 2 and self.prisoner_y > 0:
self.prisoner_y -= 1
elif prisoner_action == 3 and self.prisoner_y < 6:
self.prisoner_y += 1
if guard_action == 0 and self.guard_x > 0:
self.guard_x -= 1
elif guard_action == 1 and self.guard_x < 6:
self.guard_x += 1
elif guard_action == 2 and self.guard_y > 0:
self.guard_y -= 1
elif guard_action == 3 and self.guard_y < 6:
self.guard_y += 1
# Check termination conditions
terminations = {a: False for a in self.agents}
rewards = {a: 0 for a in self.agents}
if self.prisoner_x == self.guard_x and self.prisoner_y == self.guard_y:
rewards = {"prisoner": -1, "guard": 1}
terminations = {a: True for a in self.agents}
elif self.prisoner_x == self.escape_x and self.prisoner_y == self.escape_y:
rewards = {"prisoner": 1, "guard": -1}
terminations = {a: True for a in self.agents}
# Check truncation conditions (overwrites termination conditions)
truncations = {a: False for a in self.agents}
if self.timestep > 100:
rewards = {"prisoner": 0, "guard": 0}
truncations = {"prisoner": True, "guard": True}
self.timestep += 1
# Get observations
observations = {
a: (
self.prisoner_x + 7 * self.prisoner_y,
self.guard_x + 7 * self.guard_y,
self.escape_x + 7 * self.escape_y,
)
for a in self.agents
}
# Get dummy infos (not used in this example)
infos = {a: {} for a in self.agents}
return observations, rewards, terminations, truncations, infos
def render(self):
grid = np.zeros((7, 7))
grid[self.prisoner_y, self.prisoner_x] = "P"
grid[self.guard_y, self.guard_x] = "G"
grid[self.escape_y, self.escape_x] = "E"
print(f"{grid} \n")
@functools.lru_cache(maxsize=None)
def observation_space(self, agent):
return MultiDiscrete([7 * 7 - 1] * 3)
@functools.lru_cache(maxsize=None)
def action_space(self, agent):
return Discrete(4)