Merged PR 310: Pull last-minute beta 7 changes into dev

## Summary
the step label for actions was off by one from the step label in the step metadata log. Also the last episode was not outputting an action log because the reset method wasn't being called (there's a close method for this instead).

## Test process
Notebooks, pytests

## Checklist
- [~] PR is linked to a **work item**
- [~] **acceptance criteria** of linked ticket are met
- [y] performed **self-review** of the code
- [n] written **tests** for any new functionality added with this PR
- [n] updated the **documentation** if this PR changes or adds functionality
- [n] written/updated **design docs** if this PR implements new functionality
- [n] updated the **change log**
- [y] ran **pre-commit** checks for code style
- [y] attended to any **TO-DOs** left in the code

Related work items: #2230
This commit is contained in:
Marek Wolan
2024-03-18 10:39:01 +00:00
2 changed files with 27 additions and 9 deletions

View File

@@ -1 +1 @@
3.0.0b6
3.0.0b7

View File

@@ -47,6 +47,7 @@ class PrimaiteGymEnv(gymnasium.Env):
def step(self, action: ActType) -> Tuple[ObsType, SupportsFloat, bool, bool, Dict[str, Any]]:
"""Perform a step in the environment."""
# make ProxyAgent store the action chosen my the RL policy
step = self.game.step_counter
self.agent.store_action(action)
# apply_agent_actions accesses the action we just stored
self.game.apply_agent_actions()
@@ -62,18 +63,18 @@ class PrimaiteGymEnv(gymnasium.Env):
"agent_actions": {name: agent.action_history[-1] for name, agent in self.game.agents.items()}
} # tell us what all the agents did for convenience.
if self.game.save_step_metadata:
self._write_step_metadata_json(action, state, reward)
self._write_step_metadata_json(step, action, state, reward)
return next_obs, reward, terminated, truncated, info
def _write_step_metadata_json(self, action: int, state: Dict, reward: int):
def _write_step_metadata_json(self, step: int, action: int, state: Dict, reward: int):
output_dir = SIM_OUTPUT.path / f"episode_{self.episode_counter}" / "step_metadata"
output_dir.mkdir(parents=True, exist_ok=True)
path = output_dir / f"step_{self.game.step_counter}.json"
path = output_dir / f"step_{step}.json"
data = {
"episode": self.episode_counter,
"step": self.game.step_counter,
"step": step,
"action": int(action),
"reward": int(reward),
"state": state,
@@ -121,6 +122,12 @@ class PrimaiteGymEnv(gymnasium.Env):
else:
return self.agent.observation_manager.current_observation
def close(self):
"""Close the simulation."""
if self.io.settings.save_agent_actions:
all_agent_actions = {name: agent.action_history for name, agent in self.game.agents.items()}
self.io.write_agent_actions(agent_actions=all_agent_actions, episode=self.episode_counter)
class PrimaiteRayEnv(gymnasium.Env):
"""Ray wrapper that accepts a single `env_config` parameter in init function for compatibility with Ray."""
@@ -144,6 +151,10 @@ class PrimaiteRayEnv(gymnasium.Env):
"""Perform a step in the environment."""
return self.env.step(action)
def close(self):
"""Close the simulation."""
self.env.close()
class PrimaiteRayMARLEnv(MultiAgentEnv):
"""Ray Environment that inherits from MultiAgentEnv to allow training MARL systems."""
@@ -211,6 +222,7 @@ class PrimaiteRayMARLEnv(MultiAgentEnv):
identifier.
:rtype: Tuple[Dict[str,ObsType], Dict[str, SupportsFloat], Dict[str,bool], Dict[str,bool], Dict]
"""
step = self.game.step_counter
# 1. Perform actions
for agent_name, action in actions.items():
self.agents[agent_name].store_action(action)
@@ -232,18 +244,18 @@ class PrimaiteRayMARLEnv(MultiAgentEnv):
terminateds["__all__"] = len(self.terminateds) == len(self.agents)
truncateds["__all__"] = self.game.calculate_truncated()
if self.game.save_step_metadata:
self._write_step_metadata_json(actions, state, rewards)
self._write_step_metadata_json(step, actions, state, rewards)
return next_obs, rewards, terminateds, truncateds, infos
def _write_step_metadata_json(self, actions: Dict, state: Dict, rewards: Dict):
def _write_step_metadata_json(self, step: int, actions: Dict, state: Dict, rewards: Dict):
output_dir = SIM_OUTPUT.path / f"episode_{self.episode_counter}" / "step_metadata"
output_dir.mkdir(parents=True, exist_ok=True)
path = output_dir / f"step_{self.game.step_counter}.json"
path = output_dir / f"step_{step}.json"
data = {
"episode": self.episode_counter,
"step": self.game.step_counter,
"step": step,
"actions": {agent_name: int(action) for agent_name, action in actions.items()},
"reward": rewards,
"state": state,
@@ -260,3 +272,9 @@ class PrimaiteRayMARLEnv(MultiAgentEnv):
unflat_obs = agent.observation_manager.current_observation
obs[agent_name] = gymnasium.spaces.flatten(unflat_space, unflat_obs)
return obs
def close(self):
"""Close the simulation."""
if self.io.settings.save_agent_actions:
all_agent_actions = {name: agent.action_history for name, agent in self.game.agents.items()}
self.io.write_agent_actions(agent_actions=all_agent_actions, episode=self.episode_counter)