Source code for pettingzoo.mpe.simple.simple

# noqa: D212, D415
"""
# Simple

```{figure} mpe_simple.gif
:width: 140px
:name: simple
```

This environment is part of the <a href='..'>MPE environments</a>. Please read that page first for general information.

| Import             | `from pettingzoo.mpe import simple_v3` |
|--------------------|----------------------------------------|
| Actions            | Discrete/Continuous                    |
| Parallel API       | Yes                                    |
| Manual Control     | No                                     |
| Agents             | `agents= [agent_0]`                    |
| Agents             | 1                                      |
| Action Shape       | (5)                                    |
| Action Values      | Discrete(5)/Box(0.0, 1.0, (5,))        |
| Observation Shape  | (4)                                    |
| Observation Values | (-inf,inf)                             |
| State Shape        | (4,)                                   |
| State Values       | (-inf,inf)                             |


In this environment a single agent sees a landmark position and is rewarded based on how close it gets to the landmark (Euclidean distance). This is not a multiagent environment, and is primarily intended for debugging purposes.

Observation space: `[self_vel, landmark_rel_position]`

### Arguments

``` python
simple_v3.env(max_cycles=25, continuous_actions=False)
```



`max_cycles`:  number of frames (a step for each agent) until game terminates

`continuous_actions`: Whether agent action spaces are discrete(default) or continuous

"""

import numpy as np
from gymnasium.utils import EzPickle

from pettingzoo.mpe._mpe_utils.core import Agent, Landmark, World
from pettingzoo.mpe._mpe_utils.scenario import BaseScenario
from pettingzoo.mpe._mpe_utils.simple_env import SimpleEnv, make_env
from pettingzoo.utils.conversions import parallel_wrapper_fn


[docs] class raw_env(SimpleEnv, EzPickle): def __init__(self, max_cycles=25, continuous_actions=False, render_mode=None): EzPickle.__init__( self, max_cycles=max_cycles, continuous_actions=continuous_actions, render_mode=render_mode, ) scenario = Scenario() world = scenario.make_world() SimpleEnv.__init__( self, scenario=scenario, world=world, render_mode=render_mode, max_cycles=max_cycles, continuous_actions=continuous_actions, ) self.metadata["name"] = "simple_v3"
env = make_env(raw_env) parallel_env = parallel_wrapper_fn(env) class Scenario(BaseScenario): def make_world(self): world = World() # add agents world.agents = [Agent() for i in range(1)] for i, agent in enumerate(world.agents): agent.name = f"agent_{i}" agent.collide = False agent.silent = True # add landmarks world.landmarks = [Landmark() for i in range(1)] for i, landmark in enumerate(world.landmarks): landmark.name = "landmark %d" % i landmark.collide = False landmark.movable = False return world def reset_world(self, world, np_random): # random properties for agents for i, agent in enumerate(world.agents): agent.color = np.array([0.25, 0.25, 0.25]) # random properties for landmarks for i, landmark in enumerate(world.landmarks): landmark.color = np.array([0.75, 0.75, 0.75]) world.landmarks[0].color = np.array([0.75, 0.25, 0.25]) # set random initial states for agent in world.agents: agent.state.p_pos = np_random.uniform(-1, +1, world.dim_p) agent.state.p_vel = np.zeros(world.dim_p) agent.state.c = np.zeros(world.dim_c) for i, landmark in enumerate(world.landmarks): landmark.state.p_pos = np_random.uniform(-1, +1, world.dim_p) landmark.state.p_vel = np.zeros(world.dim_p) def reward(self, agent, world): dist2 = np.sum(np.square(agent.state.p_pos - world.landmarks[0].state.p_pos)) return -dist2 def observation(self, agent, world): # get positions of all entities in this agent's reference frame entity_pos = [] for entity in world.landmarks: entity_pos.append(entity.state.p_pos - agent.state.p_pos) return np.concatenate([agent.state.p_vel] + entity_pos)