Fix multi agent system

2023-11-22 20:22:34 +00:00
parent 9070fb44d4
commit 1fd5298fc5
3 changed files with 1366 additions and 1 deletions
--- a/src/primaite/config/_package_data/example_config_2_rl_agents.yaml
+++ b/src/primaite/config/_package_data/example_config_2_rl_agents.yaml
--- a/src/primaite/game/environment.py
+++ b/src/primaite/game/environment.py
@@ -1,7 +1,8 @@
-from typing import Any, Dict, Optional, SupportsFloat, Tuple
+from typing import Any, Dict, Final, Optional, SupportsFloat, Tuple

 import gymnasium
 from gymnasium.core import ActType, ObsType
+from ray.rllib.env.multi_agent_env import MultiAgentEnv

 from primaite.game.agent.interface import ProxyAgent
 from primaite.game.game import PrimaiteGame
@@ -86,3 +87,76 @@ class PrimaiteRayEnv(gymnasium.Env):
    def step(self, action: ActType) -> Tuple[ObsType, SupportsFloat, bool, bool, Dict]:
        """Perform a step in the environment."""
        return self.env.step(action)
+
+
+class PrimaiteRayMARLEnv(MultiAgentEnv):
+    """Ray Environment that inherits from MultiAgentEnv to allow training MARL systems."""
+
+    def __init__(self, env_config: Optional[Dict] = None) -> None:
+        """Initialise the environment.
+
+        :param env_config: A dictionary containing the environment configuration. It must contain a single key, `game`
+            which is the PrimaiteGame instance.
+        :type env_config: Dict[str, PrimaiteGame]
+        """
+        self.game: PrimaiteGame = env_config["game"]
+        """Reference to the primaite game"""
+        self.agents: Final[Dict[str, ProxyAgent]] = {agent.agent_name: agent for agent in self.game.rl_agents}
+        """List of all possible agents in the environment. This list should not change!"""
+        self._agent_ids = list(self.agents.keys())
+
+        self.terminateds = set()
+        self.truncateds = set()
+        self.observation_space = gymnasium.spaces.Dict(
+            {name: agent.observation_manager.space for name, agent in self.agents.items()}
+        )
+        self.action_space = gymnasium.spaces.Dict(
+            {name: agent.action_manager.space for name, agent in self.agents.items()}
+        )
+        super().__init__()
+
+    def reset(self, *, seed: int = None, options: dict = None) -> Tuple[ObsType, Dict]:
+        """Reset the environment."""
+        self.game.reset()
+        state = self.game.get_sim_state()
+        self.game.update_agents(state)
+        next_obs = self._get_obs()
+        info = {}
+        return next_obs, info
+
+    def step(
+        self, actions: Dict[str, ActType]
+    ) -> Tuple[Dict[str, ObsType], Dict[str, SupportsFloat], Dict[str, bool], Dict[str, bool], Dict]:
+        """Perform a step in the environment. Adherent to Ray MultiAgentEnv step API.
+
+        :param actions: Dict of actions. The key is agent identifier and the value is a gymnasium action instance.
+        :type actions: Dict[str, ActType]
+        :return: Observations, rewards, terminateds, truncateds, and info. Each one is a dictionary keyed by agent
+            identifier.
+        :rtype: Tuple[Dict[str,ObsType], Dict[str, SupportsFloat], Dict[str,bool], Dict[str,bool], Dict]
+        """
+        # 1. Perform actions
+        for agent_name, action in actions.items():
+            self.agents[agent_name].store_action(action)
+        self.game.apply_agent_actions()
+
+        # 2. Advance timestep
+        self.game.advance_timestep()
+
+        # 3. Get next observations
+        state = self.game.get_sim_state()
+        self.game.update_agents(state)
+        next_obs = self._get_obs()
+
+        # 4. Get rewards
+        rewards = {name: agent.reward_function.current_reward for name, agent in self.agents.items()}
+        terminateds = {name: False for name, _ in self.agents.items()}
+        truncateds = {name: self.game.calculate_truncated() for name, _ in self.agents.items()}
+        infos = {}
+        terminateds["__all__"] = len(self.terminateds) == len(self.agents)
+        truncateds["__all__"] = self.game.calculate_truncated()
+        return next_obs, rewards, terminateds, truncateds, infos
+
+    def _get_obs(self) -> Dict[str, ObsType]:
+        """Return the current observation."""
+        return {name: agent.observation_manager.current_observation for name, agent in self.agents.items()}
--- a/src/primaite/notebooks/training_example_ray_multi_agent.ipynb
+++ b/src/primaite/notebooks/training_example_ray_multi_agent.ipynb
@@ -0,0 +1,127 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from primaite.game.game import PrimaiteGame\n",
+    "import yaml\n",
+    "from primaite.config.load import example_config_path\n",
+    "\n",
+    "from primaite.game.environment import PrimaiteRayEnv"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "with open(example_config_path(), 'r') as f:\n",
+    "    cfg = yaml.safe_load(f)\n",
+    "\n",
+    "game = PrimaiteGame.from_config(cfg)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# gym = PrimaiteRayEnv({\"game\":game})"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import ray\n",
+    "from ray import air, tune\n",
+    "from ray.rllib.algorithms.ppo import PPOConfig"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "ray.shutdown()\n",
+    "ray.init()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from primaite.game.environment import PrimaiteRayMARLEnv\n",
+    "\n",
+    "\n",
+    "env_config = {\"game\":game}\n",
+    "config = (\n",
+    "    PPOConfig()\n",
+    "    .environment(env=PrimaiteRayMARLEnv, env_config={\"game\":game})\n",
+    "    .rollouts(num_rollout_workers=0)\n",
+    "    .multi_agent(\n",
+    "        policies={agent.agent_name for agent in game.rl_agents},\n",
+    "        policy_mapping_fn=lambda agent_id, episode, worker, **kw: agent_id,\n",
+    "        )\n",
+    "    .training(train_batch_size=128)\n",
+    "    )\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "tune.Tuner(\n",
+    "    \"PPO\",\n",
+    "    run_config=air.RunConfig(\n",
+    "        stop={\"training_iteration\": 128},\n",
+    "        checkpoint_config=air.CheckpointConfig(\n",
+    "            checkpoint_frequency=10,\n",
+    "        ),\n",
+    "    ),\n",
+    "    param_space=config\n",
+    ").fit()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "venv",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.12"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}