Merge remote-tracking branch 'origin/dev' into bugfix/2151-service-status-set-to-overwhelmed-at-incorrect-actions

2024-01-09 15:29:49 +00:00
parent 6fc4e15660 91addee09d
commit 9eb0102069
12 changed files with 92 additions and 21 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -6,7 +6,8 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

 ## [Unreleased]
-
+- Made packet capture and system logging optional (off by default). To turn on, change the io_settings.save_pcap_logs and io_settings.save_sys_logs settings in the config.
+- Made observation space flattening optional (on by default). To turn off for an agent, change the agent_settings.flatten_obs setting in the config.


 ### Added
--- a/docs/source/config.rst
+++ b/docs/source/config.rst
@@ -13,7 +13,25 @@ This section allows selecting which training framework and algorithm to use, and

 ``io_settings``
 ---------------
-This section configures how the ``PrimaiteSession`` saves data.
+This section configures how PrimAITE saves data during simulation and training.
+
+**save_final_model**: Only used if training with PrimaiteSession, if true, the policy will be saved after the final training iteration.
+
+**save_checkpoints**: Only used if training with PrimaiteSession, if true, the policy will be saved periodically during training.
+
+**checkpoint_interval**: Only used if training with PrimaiteSession and if ``save_checkpoints`` is true. Defines how often to save the policy during training.
+
+**save_logs**: *currently unused*.
+
+**save_transactions**: *currently unused*.
+
+**save_tensorboard_logs**: *currently unused*.
+
+**save_step_metadata**: Whether to save the RL agents' action, environment state, and other data at every single step.
+
+**save_pcap_logs**: Whether to save pcap files of all network traffic during the simulation.
+
+**save_sys_logs**: Whether to save system logs from all nodes during the simulation.

 ``game``
 --------
@@ -56,6 +74,10 @@ Description of configurable items:
 **agent_settings**:
 Settings passed to the agent during initialisation. These depend on the agent class.

+Reinforcement learning agents use the ``ProxyAgent`` class, they accept these agent settings:
+
+**flatten_obs**: If true, gymnasium flattening will be performed on the observation space before sending to the agent. Set this to true if your agent does not support nested observation spaces.
+
 ``simulation``
 --------------
 In this section the network layout is defined. This part of the config follows a hierarchical structure. Almost every component defines a ``ref`` field which acts as a human-readable unique identifier, used by other parts of the config, such as agents.
--- a/src/primaite/config/_package_data/example_config.yaml
+++ b/src/primaite/config/_package_data/example_config.yaml
@@ -14,6 +14,8 @@ io_settings:
  save_checkpoints: true
  checkpoint_interval: 5
  save_step_metadata: false
+  save_pcap_logs: true
+  save_sys_logs: true


 game:
@@ -523,7 +525,7 @@ agents:


    agent_settings:
-      # ...
+      flatten_obs: true



--- a/src/primaite/game/agent/interface.py
+++ b/src/primaite/game/agent/interface.py
@@ -44,6 +44,8 @@ class AgentSettings(BaseModel):

    start_settings: Optional[AgentStartSettings] = None
    "Configuration for when an agent begins performing it's actions"
+    flatten_obs: bool = True
+    "Whether to flatten the observation space before passing it to the agent. True by default."

    @classmethod
    def from_config(cls, config: Optional[Dict]) -> "AgentSettings":
@@ -166,6 +168,7 @@ class ProxyAgent(AbstractAgent):
        action_space: Optional[ActionManager],
        observation_space: Optional[ObservationManager],
        reward_function: Optional[RewardFunction],
+        agent_settings: Optional[AgentSettings] = None,
    ) -> None:
        super().__init__(
            agent_name=agent_name,
@@ -174,6 +177,7 @@ class ProxyAgent(AbstractAgent):
            reward_function=reward_function,
        )
        self.most_recent_action: ActType
+        self.flatten_obs: bool = agent_settings.flatten_obs if agent_settings else False

    def get_action(self, obs: ObsType, reward: float = 0.0) -> Tuple[str, Dict]:
        """
--- a/src/primaite/game/game.py
+++ b/src/primaite/game/game.py
@@ -432,6 +432,7 @@ class PrimaiteGame:
                    action_space=action_space,
                    observation_space=obs_space,
                    reward_function=rew_function,
+                    agent_settings=agent_settings,
                )
                game.agents.append(new_agent)
                game.rl_agents.append(new_agent)
--- a/src/primaite/notebooks/training_example_ray_single_agent.ipynb
+++ b/src/primaite/notebooks/training_example_ray_single_agent.ipynb
@@ -39,6 +39,15 @@
    "#### Create a Ray algorithm and pass it our config."
   ]
  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print(cfg['agents'][2]['agent_settings'])"
+   ]
+  },
  {
   "cell_type": "code",
   "execution_count": null,
@@ -76,6 +85,13 @@
    "    param_space=config\n",
    ").fit()\n"
   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
  }
 ],
 "metadata": {
--- a/src/primaite/session/environment.py
+++ b/src/primaite/session/environment.py
@@ -81,13 +81,19 @@ class PrimaiteGymEnv(gymnasium.Env):
    @property
    def observation_space(self) -> gymnasium.Space:
        """Return the observation space of the environment."""
-        return gymnasium.spaces.flatten_space(self.agent.observation_manager.space)
+        if self.agent.flatten_obs:
+            return gymnasium.spaces.flatten_space(self.agent.observation_manager.space)
+        else:
+            return self.agent.observation_manager.space

    def _get_obs(self) -> ObsType:
        """Return the current observation."""
-        unflat_space = self.agent.observation_manager.space
-        unflat_obs = self.agent.observation_manager.current_observation
-        return gymnasium.spaces.flatten(unflat_space, unflat_obs)
+        if not self.agent.flatten_obs:
+            return self.agent.observation_manager.current_observation
+        else:
+            unflat_space = self.agent.observation_manager.space
+            unflat_obs = self.agent.observation_manager.current_observation
+            return gymnasium.spaces.flatten(unflat_space, unflat_obs)


 class PrimaiteRayEnv(gymnasium.Env):
--- a/src/primaite/session/io.py
+++ b/src/primaite/session/io.py
@@ -24,9 +24,13 @@ class SessionIOSettings(BaseModel):
    save_transactions: bool = True
    """Whether to save transactions, If true, the session path will have a transactions folder."""
    save_tensorboard_logs: bool = False
-    """Whether to save tensorboard logs. If true, the session path will have a tenorboard_logs folder."""
+    """Whether to save tensorboard logs. If true, the session path will have a tensorboard_logs folder."""
    save_step_metadata: bool = False
    """Whether to save the RL agents' action, environment state, and other data at every single step."""
+    save_pcap_logs: bool = False
+    """Whether to save PCAP logs."""
+    save_sys_logs: bool = False
+    """Whether to save system logs."""


 class SessionIO:
@@ -39,9 +43,10 @@ class SessionIO:
    def __init__(self, settings: SessionIOSettings = SessionIOSettings()) -> None:
        self.settings: SessionIOSettings = settings
        self.session_path: Path = self.generate_session_path()
-
        # set global SIM_OUTPUT path
        SIM_OUTPUT.path = self.session_path / "simulation_output"
+        SIM_OUTPUT.save_pcap_logs = self.settings.save_pcap_logs
+        SIM_OUTPUT.save_sys_logs = self.settings.save_sys_logs

        # warning TODO: must be careful not to re-initialise sessionIO because it will create a new path each time it's
        # possible refactor needed
--- a/src/primaite/session/session.py
+++ b/src/primaite/session/session.py
@@ -54,7 +54,7 @@ class PrimaiteSession:
        self.policy: PolicyABC
        """The reinforcement learning policy."""

-        self.io_manager = SessionIO()
+        self.io_manager: Optional["SessionIO"] = None
        """IO manager for the session."""

        self.game: PrimaiteGame = game
@@ -101,9 +101,9 @@ class PrimaiteSession:

        # CREATE ENVIRONMENT
        if sess.training_options.rl_framework == "RLLIB_single_agent":
-            sess.env = PrimaiteRayEnv(env_config={"game": game})
+            sess.env = PrimaiteRayEnv(env_config={"cfg": cfg})
        elif sess.training_options.rl_framework == "RLLIB_multi_agent":
-            sess.env = PrimaiteRayMARLEnv(env_config={"game": game})
+            sess.env = PrimaiteRayMARLEnv(env_config={"cfg": cfg})
        elif sess.training_options.rl_framework == "SB3":
            sess.env = PrimaiteGymEnv(game=game)

--- a/src/primaite/simulator/init.py
+++ b/src/primaite/simulator/init.py
@@ -7,11 +7,13 @@ from primaite import _PRIMAITE_ROOT
 __all__ = ["SIM_OUTPUT"]


-class __SimOutput:
+class _SimOutput:
    def __init__(self):
        self._path: Path = (
            _PRIMAITE_ROOT.parent.parent / "simulation_output" / datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
        )
+        self.save_pcap_logs: bool = False
+        self.save_sys_logs: bool = False

    @property
    def path(self) -> Path:
@@ -23,4 +25,4 @@ class __SimOutput:
        self._path.mkdir(exist_ok=True, parents=True)


-SIM_OUTPUT = __SimOutput()
+SIM_OUTPUT = _SimOutput()
--- a/src/primaite/simulator/system/core/packet_capture.py
+++ b/src/primaite/simulator/system/core/packet_capture.py
@@ -41,6 +41,9 @@ class PacketCapture:

    def setup_logger(self):
        """Set up the logger configuration."""
+        if not SIM_OUTPUT.save_pcap_logs:
+            return
+
        log_path = self._get_log_path()

        file_handler = logging.FileHandler(filename=log_path)
@@ -88,5 +91,6 @@ class PacketCapture:

        :param frame: The PCAP frame to capture.
        """
-        msg = frame.model_dump_json()
-        self.logger.log(level=60, msg=msg)  # Log at custom log level > CRITICAL
+        if SIM_OUTPUT.save_pcap_logs:
+            msg = frame.model_dump_json()
+            self.logger.log(level=60, msg=msg)  # Log at custom log level > CRITICAL
--- a/src/primaite/simulator/system/core/sys_log.py
+++ b/src/primaite/simulator/system/core/sys_log.py
@@ -41,6 +41,9 @@ class SysLog:
        The logger is set to the DEBUG level, and is equipped with a handler that writes to a file and filters out
        JSON-like messages.
        """
+        if not SIM_OUTPUT.save_sys_logs:
+            return
+
        log_path = self._get_log_path()
        file_handler = logging.FileHandler(filename=log_path)
        file_handler.setLevel(logging.DEBUG)
@@ -91,7 +94,8 @@ class SysLog:

        :param msg: The message to be logged.
        """
-        self.logger.debug(msg)
+        if SIM_OUTPUT.save_sys_logs:
+            self.logger.debug(msg)

    def info(self, msg: str):
        """
@@ -99,7 +103,8 @@ class SysLog:

        :param msg: The message to be logged.
        """
-        self.logger.info(msg)
+        if SIM_OUTPUT.save_sys_logs:
+            self.logger.info(msg)

    def warning(self, msg: str):
        """
@@ -107,7 +112,8 @@ class SysLog:

        :param msg: The message to be logged.
        """
-        self.logger.warning(msg)
+        if SIM_OUTPUT.save_sys_logs:
+            self.logger.warning(msg)

    def error(self, msg: str):
        """
@@ -115,7 +121,8 @@ class SysLog:

        :param msg: The message to be logged.
        """
-        self.logger.error(msg)
+        if SIM_OUTPUT.save_sys_logs:
+            self.logger.error(msg)

    def critical(self, msg: str):
        """
@@ -123,4 +130,5 @@ class SysLog:

        :param msg: The message to be logged.
        """
-        self.logger.critical(msg)
+        if SIM_OUTPUT.save_sys_logs:
+            self.logger.critical(msg)