Populate step info in environment, and finish notebook

This commit is contained in:
Marek Wolan
2024-01-25 12:04:09 +00:00
parent 0a65f32adf
commit 28acb5dcae
11 changed files with 850 additions and 215 deletions

View File

@@ -31,7 +31,7 @@ game:
- UDP
agents:
- ref: client_1_green_user
- ref: client_2_green_user
team: GREEN
type: GreenWebBrowsingAgent
observation_space:

View File

@@ -25,7 +25,7 @@ game:
- UDP
agents:
- ref: client_1_green_user
- ref: client_2_green_user
team: GREEN
type: GreenWebBrowsingAgent
observation_space:

View File

@@ -113,7 +113,7 @@ class PrimaiteGame:
self.update_agents(sim_state)
# Apply all actions to simulation as requests
self.apply_agent_actions()
agent_actions = self.apply_agent_actions() # noqa
# Advance timestep
self.advance_timestep()
@@ -131,12 +131,15 @@ class PrimaiteGame:
def apply_agent_actions(self) -> None:
"""Apply all actions to simulation as requests."""
agent_actions = {}
for agent in self.agents:
obs = agent.observation_manager.current_observation
rew = agent.reward_function.current_reward
action_choice, options = agent.get_action(obs, rew)
agent_actions[agent.agent_name] = (action_choice, options)
request = agent.format_request(action_choice, options)
self.simulation.apply_request(request)
return agent_actions
def advance_timestep(self) -> None:
"""Advance timestep."""

Binary file not shown.

After

Width:  |  Height:  |  Size: 69 KiB

File diff suppressed because it is too large Load Diff

View File

@@ -29,7 +29,7 @@ class PrimaiteGymEnv(gymnasium.Env):
# make ProxyAgent store the action chosen my the RL policy
self.agent.store_action(action)
# apply_agent_actions accesses the action we just stored
self.game.apply_agent_actions()
agent_actions = self.game.apply_agent_actions()
self.game.advance_timestep()
state = self.game.get_sim_state()
@@ -39,7 +39,7 @@ class PrimaiteGymEnv(gymnasium.Env):
reward = self.agent.reward_function.current_reward
terminated = False
truncated = self.game.calculate_truncated()
info = {}
info = {"agent_actions": agent_actions} # tell us what all the agents did for convenience.
if self.game.save_step_metadata:
self._write_step_metadata_json(action, state, reward)
return next_obs, reward, terminated, truncated, info
@@ -172,7 +172,7 @@ class PrimaiteRayMARLEnv(MultiAgentEnv):
# 1. Perform actions
for agent_name, action in actions.items():
self.agents[agent_name].store_action(action)
self.game.apply_agent_actions()
agent_actions = self.game.apply_agent_actions()
# 2. Advance timestep
self.game.advance_timestep()
@@ -186,7 +186,7 @@ class PrimaiteRayMARLEnv(MultiAgentEnv):
rewards = {name: agent.reward_function.current_reward for name, agent in self.agents.items()}
terminateds = {name: False for name, _ in self.agents.items()}
truncateds = {name: self.game.calculate_truncated() for name, _ in self.agents.items()}
infos = {}
infos = {"agent_actions": agent_actions}
terminateds["__all__"] = len(self.terminateds) == len(self.agents)
truncateds["__all__"] = self.game.calculate_truncated()
if self.game.save_step_metadata:

View File

@@ -19,7 +19,7 @@ game:
- UDP
agents:
- ref: client_1_green_user
- ref: client_2_green_user
team: GREEN
type: GreenWebBrowsingAgent
observation_space:

View File

@@ -23,7 +23,7 @@ game:
- UDP
agents:
- ref: client_1_green_user
- ref: client_2_green_user
team: GREEN
type: GreenWebBrowsingAgent
observation_space:

View File

@@ -29,7 +29,7 @@ game:
- UDP
agents:
- ref: client_1_green_user
- ref: client_2_green_user
team: GREEN
type: GreenWebBrowsingAgent
observation_space:

View File

@@ -27,7 +27,7 @@ game:
- UDP
agents:
- ref: client_1_green_user
- ref: client_2_green_user
team: GREEN
type: GreenWebBrowsingAgent
observation_space:

View File

@@ -23,7 +23,7 @@ game:
- UDP
agents:
- ref: client_1_green_user
- ref: client_2_green_user
team: GREEN
type: GreenWebBrowsingAgent
observation_space: