# noqa: D212, D415
"""
# Simple
```{figure} mpe_simple.gif
:width: 140px
:name: simple
```
This environment is part of the <a href='..'>MPE environments</a>. Please read that page first for general information.
| Import | `from pettingzoo.mpe import simple_v3` |
|--------------------|----------------------------------------|
| Actions | Discrete/Continuous |
| Parallel API | Yes |
| Manual Control | No |
| Agents | `agents= [agent_0]` |
| Agents | 1 |
| Action Shape | (5) |
| Action Values | Discrete(5)/Box(0.0, 1.0, (5,)) |
| Observation Shape | (4) |
| Observation Values | (-inf,inf) |
| State Shape | (4,) |
| State Values | (-inf,inf) |
In this environment a single agent sees a landmark position and is rewarded based on how close it gets to the landmark (Euclidean distance). This is not a multiagent environment, and is primarily intended for debugging purposes.
Observation space: `[self_vel, landmark_rel_position]`
### Arguments
``` python
simple_v3.env(max_cycles=25, continuous_actions=False)
```
`max_cycles`: number of frames (a step for each agent) until game terminates
`continuous_actions`: Whether agent action spaces are discrete(default) or continuous
"""
import numpy as np
from gymnasium.utils import EzPickle
from pettingzoo.mpe._mpe_utils.core import Agent, Landmark, World
from pettingzoo.mpe._mpe_utils.scenario import BaseScenario
from pettingzoo.mpe._mpe_utils.simple_env import SimpleEnv, make_env
from pettingzoo.utils.conversions import parallel_wrapper_fn
[docs]
class raw_env(SimpleEnv, EzPickle):
def __init__(self, max_cycles=25, continuous_actions=False, render_mode=None):
EzPickle.__init__(
self,
max_cycles=max_cycles,
continuous_actions=continuous_actions,
render_mode=render_mode,
)
scenario = Scenario()
world = scenario.make_world()
SimpleEnv.__init__(
self,
scenario=scenario,
world=world,
render_mode=render_mode,
max_cycles=max_cycles,
continuous_actions=continuous_actions,
)
self.metadata["name"] = "simple_v3"
env = make_env(raw_env)
parallel_env = parallel_wrapper_fn(env)
class Scenario(BaseScenario):
def make_world(self):
world = World()
# add agents
world.agents = [Agent() for i in range(1)]
for i, agent in enumerate(world.agents):
agent.name = f"agent_{i}"
agent.collide = False
agent.silent = True
# add landmarks
world.landmarks = [Landmark() for i in range(1)]
for i, landmark in enumerate(world.landmarks):
landmark.name = "landmark %d" % i
landmark.collide = False
landmark.movable = False
return world
def reset_world(self, world, np_random):
# random properties for agents
for i, agent in enumerate(world.agents):
agent.color = np.array([0.25, 0.25, 0.25])
# random properties for landmarks
for i, landmark in enumerate(world.landmarks):
landmark.color = np.array([0.75, 0.75, 0.75])
world.landmarks[0].color = np.array([0.75, 0.25, 0.25])
# set random initial states
for agent in world.agents:
agent.state.p_pos = np_random.uniform(-1, +1, world.dim_p)
agent.state.p_vel = np.zeros(world.dim_p)
agent.state.c = np.zeros(world.dim_c)
for i, landmark in enumerate(world.landmarks):
landmark.state.p_pos = np_random.uniform(-1, +1, world.dim_p)
landmark.state.p_vel = np.zeros(world.dim_p)
def reward(self, agent, world):
dist2 = np.sum(np.square(agent.state.p_pos - world.landmarks[0].state.p_pos))
return -dist2
def observation(self, agent, world):
# get positions of all entities in this agent's reference frame
entity_pos = []
for entity in world.landmarks:
entity_pos.append(entity.state.p_pos - agent.state.p_pos)
return np.concatenate([agent.state.p_vel] + entity_pos)