Merged PR 310: Pull last-minute beta 7 changes into dev

## Summary the step label for actions was off by one from the step label in the step metadata log. Also the last episode was not outputting an action log because the reset method wasn't being called (there's a close method for this instead). ## Test process Notebooks, pytests ## Checklist - [~] PR is linked to a **work item** - [~] **acceptance criteria** of linked ticket are met - [y] performed **self-review** of the code - [n] written **tests** for any new functionality added with this PR - [n] updated the **documentation** if this PR changes or adds functionality - [n] written/updated **design docs** if this PR implements new functionality - [n] updated the **change log** - [y] ran **pre-commit** checks for code style - [y] attended to any **TO-DOs** left in the code Related work items: #2230
2024-03-18 10:39:01 +00:00
parent d5fb2f2309 bb01619918
commit cb9c14c87e
2 changed files with 27 additions and 9 deletions
--- a/src/primaite/VERSION
+++ b/src/primaite/VERSION
@@ -1 +1 @@
-3.0.0b6
+3.0.0b7
--- a/src/primaite/session/environment.py
+++ b/src/primaite/session/environment.py
@@ -47,6 +47,7 @@ class PrimaiteGymEnv(gymnasium.Env):
    def step(self, action: ActType) -> Tuple[ObsType, SupportsFloat, bool, bool, Dict[str, Any]]:
        """Perform a step in the environment."""
        # make ProxyAgent store the action chosen my the RL policy
+        step = self.game.step_counter
        self.agent.store_action(action)
        # apply_agent_actions accesses the action we just stored
        self.game.apply_agent_actions()
@@ -62,18 +63,18 @@ class PrimaiteGymEnv(gymnasium.Env):
            "agent_actions": {name: agent.action_history[-1] for name, agent in self.game.agents.items()}
        }  # tell us what all the agents did for convenience.
        if self.game.save_step_metadata:
-            self._write_step_metadata_json(action, state, reward)
+            self._write_step_metadata_json(step, action, state, reward)
        return next_obs, reward, terminated, truncated, info

-    def _write_step_metadata_json(self, action: int, state: Dict, reward: int):
+    def _write_step_metadata_json(self, step: int, action: int, state: Dict, reward: int):
        output_dir = SIM_OUTPUT.path / f"episode_{self.episode_counter}" / "step_metadata"

        output_dir.mkdir(parents=True, exist_ok=True)
-        path = output_dir / f"step_{self.game.step_counter}.json"
+        path = output_dir / f"step_{step}.json"

        data = {
            "episode": self.episode_counter,
-            "step": self.game.step_counter,
+            "step": step,
            "action": int(action),
            "reward": int(reward),
            "state": state,
@@ -121,6 +122,12 @@ class PrimaiteGymEnv(gymnasium.Env):
        else:
            return self.agent.observation_manager.current_observation

+    def close(self):
+        """Close the simulation."""
+        if self.io.settings.save_agent_actions:
+            all_agent_actions = {name: agent.action_history for name, agent in self.game.agents.items()}
+            self.io.write_agent_actions(agent_actions=all_agent_actions, episode=self.episode_counter)
+

 class PrimaiteRayEnv(gymnasium.Env):
    """Ray wrapper that accepts a single `env_config` parameter in init function for compatibility with Ray."""
@@ -144,6 +151,10 @@ class PrimaiteRayEnv(gymnasium.Env):
        """Perform a step in the environment."""
        return self.env.step(action)

+    def close(self):
+        """Close the simulation."""
+        self.env.close()
+

 class PrimaiteRayMARLEnv(MultiAgentEnv):
    """Ray Environment that inherits from MultiAgentEnv to allow training MARL systems."""
@@ -211,6 +222,7 @@ class PrimaiteRayMARLEnv(MultiAgentEnv):
            identifier.
        :rtype: Tuple[Dict[str,ObsType], Dict[str, SupportsFloat], Dict[str,bool], Dict[str,bool], Dict]
        """
+        step = self.game.step_counter
        # 1. Perform actions
        for agent_name, action in actions.items():
            self.agents[agent_name].store_action(action)
@@ -232,18 +244,18 @@ class PrimaiteRayMARLEnv(MultiAgentEnv):
        terminateds["__all__"] = len(self.terminateds) == len(self.agents)
        truncateds["__all__"] = self.game.calculate_truncated()
        if self.game.save_step_metadata:
-            self._write_step_metadata_json(actions, state, rewards)
+            self._write_step_metadata_json(step, actions, state, rewards)
        return next_obs, rewards, terminateds, truncateds, infos

-    def _write_step_metadata_json(self, actions: Dict, state: Dict, rewards: Dict):
+    def _write_step_metadata_json(self, step: int, actions: Dict, state: Dict, rewards: Dict):
        output_dir = SIM_OUTPUT.path / f"episode_{self.episode_counter}" / "step_metadata"

        output_dir.mkdir(parents=True, exist_ok=True)
-        path = output_dir / f"step_{self.game.step_counter}.json"
+        path = output_dir / f"step_{step}.json"

        data = {
            "episode": self.episode_counter,
-            "step": self.game.step_counter,
+            "step": step,
            "actions": {agent_name: int(action) for agent_name, action in actions.items()},
            "reward": rewards,
            "state": state,
@@ -260,3 +272,9 @@ class PrimaiteRayMARLEnv(MultiAgentEnv):
            unflat_obs = agent.observation_manager.current_observation
            obs[agent_name] = gymnasium.spaces.flatten(unflat_space, unflat_obs)
        return obs
+
+    def close(self):
+        """Close the simulation."""
+        if self.io.settings.save_agent_actions:
+            all_agent_actions = {name: agent.action_history for name, agent in self.game.agents.items()}
+            self.io.write_agent_actions(agent_actions=all_agent_actions, episode=self.episode_counter)
@@ -1 +1 @@
 .0.0b6
 .0.0b7