# (WIP) Creating Environments: Environment Logic#

## Introduction#

Now that we have a basic understanding of the structure of environment repositories, we can start thinking about the fun part - environment logic!

For this tutorial, we will be creating a two-player game consisting of a prisoner, trying to escape, and a guard, trying to catch the prisoner. This game will be played on a 7x7 grid, where:

• The prisoner starts in the top left corner,

• the guard starts in the bottom right corner,

• the escape door is randomly placed in the middle of the grid, and

• Both the prisoner and the guard can move in any of the four cardinal directions (up, down, left, right).

## Code#

/custom-environment/env/custom_environment.py#
```import functools
import random
from copy import copy

import numpy as np
from gymnasium.spaces import Discrete, MultiDiscrete

from pettingzoo.utils.env import ParallelEnv

class CustomEnvironment(ParallelEnv):
def __init__(self):
self.escape_y = None
self.escape_x = None
self.guard_y = None
self.guard_x = None
self.prisoner_y = None
self.prisoner_x = None
self.timestep = None
self.possible_agents = ["prisoner", "guard"]

def reset(self, seed=None, options=None):
self.agents = copy(self.possible_agents)
self.timestep = 0

self.prisoner_x = 0
self.prisoner_y = 0

self.guard_x = 7
self.guard_y = 7

self.escape_x = random.randint(2, 5)
self.escape_y = random.randint(2, 5)

observations = {
a: (
self.prisoner_x + 7 * self.prisoner_y,
self.guard_x + 7 * self.guard_y,
self.escape_x + 7 * self.escape_y,
)
for a in self.agents
}
return observations

def step(self, actions):
# Execute actions
prisoner_action = actions["prisoner"]
guard_action = actions["guard"]

if prisoner_action == 0 and self.prisoner_x > 0:
self.prisoner_x -= 1
elif prisoner_action == 1 and self.prisoner_x < 6:
self.prisoner_x += 1
elif prisoner_action == 2 and self.prisoner_y > 0:
self.prisoner_y -= 1
elif prisoner_action == 3 and self.prisoner_y < 6:
self.prisoner_y += 1

if guard_action == 0 and self.guard_x > 0:
self.guard_x -= 1
elif guard_action == 1 and self.guard_x < 6:
self.guard_x += 1
elif guard_action == 2 and self.guard_y > 0:
self.guard_y -= 1
elif guard_action == 3 and self.guard_y < 6:
self.guard_y += 1

# Check termination conditions
terminations = {a: False for a in self.agents}
rewards = {a: 0 for a in self.agents}
if self.prisoner_x == self.guard_x and self.prisoner_y == self.guard_y:
rewards = {"prisoner": -1, "guard": 1}
terminations = {a: True for a in self.agents}

elif self.prisoner_x == self.escape_x and self.prisoner_y == self.escape_y:
rewards = {"prisoner": 1, "guard": -1}
terminations = {a: True for a in self.agents}

# Check truncation conditions (overwrites termination conditions)
truncations = {a: False for a in self.agents}
if self.timestep > 100:
rewards = {"prisoner": 0, "guard": 0}
truncations = {"prisoner": True, "guard": True}
self.agents = []
self.timestep += 1

# Get observations
observations = {
a: (
self.prisoner_x + 7 * self.prisoner_y,
self.guard_x + 7 * self.guard_y,
self.escape_x + 7 * self.escape_y,
)
for a in self.agents
}

# Get dummy infos (not used in this example)
infos = {a: {} for a in self.agents}

return observations, rewards, terminations, truncations, infos

def render(self):
grid = np.zeros((7, 7))
grid[self.prisoner_y, self.prisoner_x] = "P"
grid[self.guard_y, self.guard_x] = "G"
grid[self.escape_y, self.escape_x] = "E"
print(f"{grid} \n")

@functools.lru_cache(maxsize=None)
def observation_space(self, agent):
return MultiDiscrete([7 * 7 - 1] * 3)

@functools.lru_cache(maxsize=None)
def action_space(self, agent):
return Discrete(4)
```