Populate step info in environment, and finish notebook

2024-01-25 12:04:09 +00:00
parent 0a65f32adf
commit 28acb5dcae
11 changed files with 850 additions and 215 deletions
--- a/src/primaite/config/_package_data/example_config.yaml
+++ b/src/primaite/config/_package_data/example_config.yaml
@@ -31,7 +31,7 @@ game:
  - UDP

 agents:
-  - ref: client_1_green_user
+  - ref: client_2_green_user
    team: GREEN
    type: GreenWebBrowsingAgent
    observation_space:
--- a/src/primaite/config/_package_data/example_config_2_rl_agents.yaml
+++ b/src/primaite/config/_package_data/example_config_2_rl_agents.yaml
@@ -25,7 +25,7 @@ game:
  - UDP

 agents:
-  - ref: client_1_green_user
+  - ref: client_2_green_user
    team: GREEN
    type: GreenWebBrowsingAgent
    observation_space:
--- a/src/primaite/game/game.py
+++ b/src/primaite/game/game.py
@@ -113,7 +113,7 @@ class PrimaiteGame:
        self.update_agents(sim_state)

        # Apply all actions to simulation as requests
-        self.apply_agent_actions()
+        agent_actions = self.apply_agent_actions()  # noqa

        # Advance timestep
        self.advance_timestep()
@@ -131,12 +131,15 @@ class PrimaiteGame:

    def apply_agent_actions(self) -> None:
        """Apply all actions to simulation as requests."""
+        agent_actions = {}
        for agent in self.agents:
            obs = agent.observation_manager.current_observation
            rew = agent.reward_function.current_reward
            action_choice, options = agent.get_action(obs, rew)
+            agent_actions[agent.agent_name] = (action_choice, options)
            request = agent.format_request(action_choice, options)
            self.simulation.apply_request(request)
+        return agent_actions

    def advance_timestep(self) -> None:
        """Advance timestep."""
--- a/src/primaite/notebooks/_package_data/uc2_network.png
+++ b/src/primaite/notebooks/_package_data/uc2_network.png
--- a/src/primaite/notebooks/uc2_demo.ipynb
+++ b/src/primaite/notebooks/uc2_demo.ipynb
--- a/src/primaite/session/environment.py
+++ b/src/primaite/session/environment.py
@@ -29,7 +29,7 @@ class PrimaiteGymEnv(gymnasium.Env):
        # make ProxyAgent store the action chosen my the RL policy
        self.agent.store_action(action)
        # apply_agent_actions accesses the action we just stored
-        self.game.apply_agent_actions()
+        agent_actions = self.game.apply_agent_actions()
        self.game.advance_timestep()
        state = self.game.get_sim_state()

@@ -39,7 +39,7 @@ class PrimaiteGymEnv(gymnasium.Env):
        reward = self.agent.reward_function.current_reward
        terminated = False
        truncated = self.game.calculate_truncated()
-        info = {}
+        info = {"agent_actions": agent_actions}  # tell us what all the agents did for convenience.
        if self.game.save_step_metadata:
            self._write_step_metadata_json(action, state, reward)
        return next_obs, reward, terminated, truncated, info
@@ -172,7 +172,7 @@ class PrimaiteRayMARLEnv(MultiAgentEnv):
        # 1. Perform actions
        for agent_name, action in actions.items():
            self.agents[agent_name].store_action(action)
-        self.game.apply_agent_actions()
+        agent_actions = self.game.apply_agent_actions()

        # 2. Advance timestep
        self.game.advance_timestep()
@@ -186,7 +186,7 @@ class PrimaiteRayMARLEnv(MultiAgentEnv):
        rewards = {name: agent.reward_function.current_reward for name, agent in self.agents.items()}
        terminateds = {name: False for name, _ in self.agents.items()}
        truncateds = {name: self.game.calculate_truncated() for name, _ in self.agents.items()}
-        infos = {}
+        infos = {"agent_actions": agent_actions}
        terminateds["__all__"] = len(self.terminateds) == len(self.agents)
        truncateds["__all__"] = self.game.calculate_truncated()
        if self.game.save_step_metadata:
--- a/tests/assets/configs/bad_primaite_session.yaml
+++ b/tests/assets/configs/bad_primaite_session.yaml
@@ -19,7 +19,7 @@ game:
  - UDP

 agents:
-  - ref: client_1_green_user
+  - ref: client_2_green_user
    team: GREEN
    type: GreenWebBrowsingAgent
    observation_space:
--- a/tests/assets/configs/eval_only_primaite_session.yaml
+++ b/tests/assets/configs/eval_only_primaite_session.yaml
@@ -23,7 +23,7 @@ game:
  - UDP

 agents:
-  - ref: client_1_green_user
+  - ref: client_2_green_user
    team: GREEN
    type: GreenWebBrowsingAgent
    observation_space:
--- a/tests/assets/configs/multi_agent_session.yaml
+++ b/tests/assets/configs/multi_agent_session.yaml
@@ -29,7 +29,7 @@ game:
  - UDP

 agents:
-  - ref: client_1_green_user
+  - ref: client_2_green_user
    team: GREEN
    type: GreenWebBrowsingAgent
    observation_space:
--- a/tests/assets/configs/test_primaite_session.yaml
+++ b/tests/assets/configs/test_primaite_session.yaml
@@ -27,7 +27,7 @@ game:
  - UDP

 agents:
-  - ref: client_1_green_user
+  - ref: client_2_green_user
    team: GREEN
    type: GreenWebBrowsingAgent
    observation_space:
--- a/tests/assets/configs/train_only_primaite_session.yaml
+++ b/tests/assets/configs/train_only_primaite_session.yaml
@@ -23,7 +23,7 @@ game:
  - UDP

 agents:
-  - ref: client_1_green_user
+  - ref: client_2_green_user
    team: GREEN
    type: GreenWebBrowsingAgent
    observation_space: