Merge remote-tracking branch 'origin/dev' into feature/2628-update-benchmarking-script-branched

2024-06-01 00:20:21 +01:00
parent 5f02846879 c705a15d00
commit 472040aa70
35 changed files with 427 additions and 348 deletions
--- a/src/primaite/game/agent/interface.py
+++ b/src/primaite/game/agent/interface.py
@@ -14,7 +14,7 @@ if TYPE_CHECKING:
    pass


-class AgentActionHistoryItem(BaseModel):
+class AgentHistoryItem(BaseModel):
    """One entry of an agent's action log - what the agent did and how the simulator responded in 1 step."""

    timestep: int
@@ -32,6 +32,8 @@ class AgentActionHistoryItem(BaseModel):
    response: RequestResponse
    """The response sent back by the simulator for this action."""

+    reward: Optional[float] = None
+

 class AgentStartSettings(BaseModel):
    """Configuration values for when an agent starts performing actions."""
@@ -110,7 +112,7 @@ class AbstractAgent(ABC):
        self.observation_manager: Optional[ObservationManager] = observation_space
        self.reward_function: Optional[RewardFunction] = reward_function
        self.agent_settings = agent_settings or AgentSettings()
-        self.action_history: List[AgentActionHistoryItem] = []
+        self.history: List[AgentHistoryItem] = []

    def update_observation(self, state: Dict) -> ObsType:
        """
@@ -130,7 +132,7 @@ class AbstractAgent(ABC):
        :return: Reward from the state.
        :rtype: float
        """
-        return self.reward_function.update(state=state, last_action_response=self.action_history[-1])
+        return self.reward_function.update(state=state, last_action_response=self.history[-1])

    @abstractmethod
    def get_action(self, obs: ObsType, timestep: int = 0) -> Tuple[str, Dict]:
@@ -161,12 +163,16 @@ class AbstractAgent(ABC):
        self, timestep: int, action: str, parameters: Dict[str, Any], request: RequestFormat, response: RequestResponse
    ) -> None:
        """Process the response from the most recent action."""
-        self.action_history.append(
-            AgentActionHistoryItem(
+        self.history.append(
+            AgentHistoryItem(
                timestep=timestep, action=action, parameters=parameters, request=request, response=response
            )
        )

+    def save_reward_to_history(self) -> None:
+        """Update the most recent history item with the reward value."""
+        self.history[-1].reward = self.reward_function.current_reward
+

 class AbstractScriptedAgent(AbstractAgent):
    """Base class for actors which generate their own behaviour."""
--- a/src/primaite/game/agent/rewards.py
+++ b/src/primaite/game/agent/rewards.py
@@ -34,7 +34,7 @@ from primaite import getLogger
 from primaite.game.agent.utils import access_from_nested_dict, NOT_PRESENT_IN_STATE

 if TYPE_CHECKING:
-    from primaite.game.agent.interface import AgentActionHistoryItem
+    from primaite.game.agent.interface import AgentHistoryItem

 _LOGGER = getLogger(__name__)
 WhereType = Optional[Iterable[Union[str, int]]]
@@ -44,7 +44,7 @@ class AbstractReward:
    """Base class for reward function components."""

    @abstractmethod
-    def calculate(self, state: Dict, last_action_response: "AgentActionHistoryItem") -> float:
+    def calculate(self, state: Dict, last_action_response: "AgentHistoryItem") -> float:
        """Calculate the reward for the current state."""
        return 0.0

@@ -64,7 +64,7 @@ class AbstractReward:
 class DummyReward(AbstractReward):
    """Dummy reward function component which always returns 0."""

-    def calculate(self, state: Dict, last_action_response: "AgentActionHistoryItem") -> float:
+    def calculate(self, state: Dict, last_action_response: "AgentHistoryItem") -> float:
        """Calculate the reward for the current state."""
        return 0.0

@@ -104,7 +104,7 @@ class DatabaseFileIntegrity(AbstractReward):
            file_name,
        ]

-    def calculate(self, state: Dict, last_action_response: "AgentActionHistoryItem") -> float:
+    def calculate(self, state: Dict, last_action_response: "AgentHistoryItem") -> float:
        """Calculate the reward for the current state.

        :param state: The current state of the simulation.
@@ -159,7 +159,7 @@ class WebServer404Penalty(AbstractReward):
        """
        self.location_in_state = ["network", "nodes", node_hostname, "services", service_name]

-    def calculate(self, state: Dict, last_action_response: "AgentActionHistoryItem") -> float:
+    def calculate(self, state: Dict, last_action_response: "AgentHistoryItem") -> float:
        """Calculate the reward for the current state.

        :param state: The current state of the simulation.
@@ -213,7 +213,7 @@ class WebpageUnavailablePenalty(AbstractReward):
        self.location_in_state: List[str] = ["network", "nodes", node_hostname, "applications", "WebBrowser"]
        self._last_request_failed: bool = False

-    def calculate(self, state: Dict, last_action_response: "AgentActionHistoryItem") -> float:
+    def calculate(self, state: Dict, last_action_response: "AgentHistoryItem") -> float:
        """
        Calculate the reward based on current simulation state, and the recent agent action.

@@ -273,7 +273,7 @@ class GreenAdminDatabaseUnreachablePenalty(AbstractReward):
        self.location_in_state: List[str] = ["network", "nodes", node_hostname, "applications", "DatabaseClient"]
        self._last_request_failed: bool = False

-    def calculate(self, state: Dict, last_action_response: "AgentActionHistoryItem") -> float:
+    def calculate(self, state: Dict, last_action_response: "AgentHistoryItem") -> float:
        """
        Calculate the reward based on current simulation state, and the recent agent action.

@@ -343,7 +343,7 @@ class SharedReward(AbstractReward):
        self.callback: Callable[[str], float] = default_callback
        """Method that retrieves an agent's current reward given the agent's name."""

-    def calculate(self, state: Dict, last_action_response: "AgentActionHistoryItem") -> float:
+    def calculate(self, state: Dict, last_action_response: "AgentHistoryItem") -> float:
        """Simply access the other agent's reward and return it."""
        return self.callback(self.agent_name)

@@ -389,7 +389,7 @@ class RewardFunction:
        """
        self.reward_components.append((component, weight))

-    def update(self, state: Dict, last_action_response: "AgentActionHistoryItem") -> float:
+    def update(self, state: Dict, last_action_response: "AgentHistoryItem") -> float:
        """Calculate the overall reward for the current state.

        :param state: The current state of the simulation.
--- a/src/primaite/game/game.py
+++ b/src/primaite/game/game.py
@@ -160,6 +160,7 @@ class PrimaiteGame:
            agent = self.agents[agent_name]
            if self.step_counter > 0:  # can't get reward before first action
                agent.update_reward(state=state)
+                agent.save_reward_to_history()
            agent.update_observation(state=state)  # order of this doesn't matter so just use reward order
            agent.reward_function.total_reward += agent.reward_function.current_reward

--- a/src/primaite/notebooks/Data-Manipulation-Customising-Red-Agent.ipynb
+++ b/src/primaite/notebooks/Data-Manipulation-Customising-Red-Agent.ipynb
@@ -22,7 +22,7 @@
    "# Imports\n",
    "\n",
    "from primaite.config.load import data_manipulation_config_path\n",
-    "from primaite.game.agent.interface import AgentActionHistoryItem\n",
+    "from primaite.game.agent.interface import AgentHistoryItem\n",
    "from primaite.session.environment import PrimaiteGymEnv\n",
    "import yaml\n",
    "from pprint import pprint"
@@ -63,7 +63,7 @@
   "source": [
    "def friendly_output_red_action(info):\n",
    "    # parse the info dict form step output and write out what the red agent is doing\n",
-    "    red_info : AgentActionHistoryItem = info['agent_actions']['data_manipulation_attacker']\n",
+    "    red_info : AgentHistoryItem = info['agent_actions']['data_manipulation_attacker']\n",
    "    red_action = red_info.action\n",
    "    if red_action == 'DONOTHING':\n",
    "        red_str = 'DO NOTHING'\n",
--- a/src/primaite/notebooks/Data-Manipulation-E2E-Demonstration.ipynb
+++ b/src/primaite/notebooks/Data-Manipulation-E2E-Demonstration.ipynb
@@ -59,7 +59,7 @@
    "\n",
    "At the start of every episode, the red agent randomly chooses either client 1 or client 2 to login to. It waits a bit then sends a DELETE query to the database from its chosen client. If the delete is successful, the database file is flagged as compromised to signal that data is not available.\n",
    "\n",
-    "[<img src=\"_package_data/uc2_attack.png\" width=\"500\"/>](_package_data/uc2_attack.png)\n",
+    "![uc2_attack](./_package_data/uc2_attack.png)\n",
    "\n",
    "_(click image to enlarge)_"
   ]
@@ -180,15 +180,15 @@
    "| link_id | endpoint_a       | endpoint_b        |\n",
    "|---------|------------------|-------------------|\n",
    "| 1       | router_1         | switch_1          |\n",
-    "| 1       | router_1         | switch_2          |\n",
-    "| 1       | switch_1         | domain_controller |\n",
-    "| 1       | switch_1         | web_server        |\n",
-    "| 1       | switch_1         | database_server   |\n",
-    "| 1       | switch_1         | backup_server     |\n",
-    "| 1       | switch_1         | security_suite    |\n",
-    "| 1       | switch_2         | client_1          |\n",
-    "| 1       | switch_2         | client_2          |\n",
-    "| 1       | switch_2         | security_suite    |\n",
+    "| 2       | router_1         | switch_2          |\n",
+    "| 3       | switch_1         | domain_controller |\n",
+    "| 4       | switch_1         | web_server        |\n",
+    "| 5       | switch_1         | database_server   |\n",
+    "| 6       | switch_1         | backup_server     |\n",
+    "| 7       | switch_1         | security_suite    |\n",
+    "| 8       | switch_2         | client_1          |\n",
+    "| 9       | switch_2         | client_2          |\n",
+    "| 10      | switch_2         | security_suite    |\n",
    "\n",
    "\n",
    "The ACL rules in the observation space appear in the same order that they do in the actual ACL. Though, only the first 10 rules are shown, there are default rules lower down that cannot be changed by the agent. The extra rules just allow the network to function normally, by allowing pings, ARP traffic, etc.\n",
@@ -392,7 +392,7 @@
    "# Imports\n",
    "from primaite.config.load import data_manipulation_config_path\n",
    "from primaite.session.environment import PrimaiteGymEnv\n",
-    "from primaite.game.agent.interface import AgentActionHistoryItem\n",
+    "from primaite.game.agent.interface import AgentHistoryItem\n",
    "import yaml\n",
    "from pprint import pprint\n"
   ]
@@ -401,7 +401,8 @@
   "cell_type": "markdown",
   "metadata": {},
   "source": [
-    "Instantiate the environment. We also disable the agent observation flattening.\n",
+    "Instantiate the environment. \n",
+    "We will also disable the agent observation flattening.\n",
    "\n",
    "This cell will print the observation when the network is healthy. You should be able to verify Node file and service statuses against the description above."
   ]
@@ -444,7 +445,7 @@
   "source": [
    "def friendly_output_red_action(info):\n",
    "    # parse the info dict form step output and write out what the red agent is doing\n",
-    "    red_info : AgentActionHistoryItem = info['agent_actions']['data_manipulation_attacker']\n",
+    "    red_info : AgentHistoryItem = info['agent_actions']['data_manipulation_attacker']\n",
    "    red_action = red_info.action\n",
    "    if red_action == 'DONOTHING':\n",
    "        red_str = 'DO NOTHING'\n",
@@ -705,7 +706,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.10.11"
+   "version": "3.10.8"
  }
 },
 "nbformat": 4,
--- a/src/primaite/notebooks/Training-an-RLLIB-MARL-System.ipynb
+++ b/src/primaite/notebooks/Training-an-RLLIB-MARL-System.ipynb
@@ -25,13 +25,13 @@
    "from primaite.game.game import PrimaiteGame\n",
    "import yaml\n",
    "\n",
-    "from primaite.session.environment import PrimaiteRayEnv\n",
+    "from primaite.session.ray_envs import PrimaiteRayEnv\n",
    "from primaite import PRIMAITE_PATHS\n",
    "\n",
    "import ray\n",
    "from ray import air, tune\n",
    "from ray.rllib.algorithms.ppo import PPOConfig\n",
-    "from primaite.session.environment import PrimaiteRayMARLEnv\n",
+    "from primaite.session.ray_envs import PrimaiteRayMARLEnv\n",
    "\n",
    "# If you get an error saying this config file doesn't exist, you may need to run `primaite setup` in your command line\n",
    "# to copy the files to your user data path.\n",
@@ -60,8 +60,8 @@
    "        policies={'defender_1','defender_2'}, # These names are the same as the agents defined in the example config.\n",
    "        policy_mapping_fn=lambda agent_id, episode, worker, **kw: agent_id,\n",
    "        )\n",
-    "    .environment(env=PrimaiteRayMARLEnv, env_config=cfg)#, disable_env_checking=True)\n",
-    "    .rollouts(num_rollout_workers=0)\n",
+    "    .environment(env=PrimaiteRayMARLEnv, env_config=cfg)\n",
+    "    .env_runners(num_env_runners=0)\n",
    "    .training(train_batch_size=128)\n",
    "    )\n"
   ]
--- a/src/primaite/notebooks/Training-an-RLLib-Agent.ipynb
+++ b/src/primaite/notebooks/Training-an-RLLib-Agent.ipynb
@@ -18,8 +18,7 @@
    "import yaml\n",
    "from primaite.config.load import data_manipulation_config_path\n",
    "\n",
-    "from primaite.session.environment import PrimaiteRayEnv\n",
-    "from ray.rllib.algorithms import ppo\n",
+    "from primaite.session.ray_envs import PrimaiteRayEnv\n",
    "from ray import air, tune\n",
    "import ray\n",
    "from ray.rllib.algorithms.ppo import PPOConfig\n",
@@ -52,8 +51,8 @@
    "\n",
    "config = (\n",
    "    PPOConfig()\n",
-    "    .environment(env=PrimaiteRayEnv, env_config=env_config, disable_env_checking=True)\n",
-    "    .rollouts(num_rollout_workers=0)\n",
+    "    .environment(env=PrimaiteRayEnv, env_config=env_config)\n",
+    "    .env_runners(num_env_runners=0)\n",
    "    .training(train_batch_size=128)\n",
    ")\n"
   ]
@@ -74,7 +73,7 @@
    "tune.Tuner(\n",
    "    \"PPO\",\n",
    "    run_config=air.RunConfig(\n",
-    "        stop={\"timesteps_total\": 5 * 128}\n",
+    "        stop={\"timesteps_total\": 512}\n",
    "    ),\n",
    "    param_space=config\n",
    ").fit()\n"
@@ -97,7 +96,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.10.11"
+   "version": "3.10.12"
  }
 },
 "nbformat": 4,
--- a/src/primaite/notebooks/Training-an-SB3-Agent.ipynb
+++ b/src/primaite/notebooks/Training-an-SB3-Agent.ipynb
@@ -43,7 +43,10 @@
   "outputs": [],
   "source": [
    "with open(data_manipulation_config_path(), 'r') as f:\n",
-    "    cfg = yaml.safe_load(f)"
+    "    cfg = yaml.safe_load(f)\n",
+    "for agent in cfg['agents']:\n",
+    "    if agent['ref'] == 'defender':\n",
+    "        agent['agent_settings']['flatten_obs']=True"
   ]
  },
  {
@@ -177,7 +180,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.8.10"
+   "version": "3.10.12"
  }
 },
 "nbformat": 4,
--- a/src/primaite/notebooks/Using-Episode-Schedules.ipynb
+++ b/src/primaite/notebooks/Using-Episode-Schedules.ipynb
@@ -13,50 +13,6 @@
    "directory with several config files that work together."
   ]
  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Defining variations in the config file.\n",
-    "\n",
-    "### Base scenario\n",
-    "The base scenario is essentially the same as a fixed YAML configuration, but it can contain placeholders that are \n",
-    "populated with episode-specific data at runtime. The base scenario contains any network, agent, or settings that\n",
-    "remain fixed for the entire training/evaluation session.\n",
-    "\n",
-    "The placeholders are defined as YAML Aliases and they are denoted by an asterisk (`*placeholder`).\n",
-    "\n",
-    "### Variations\n",
-    "For each variation that could be used in a placeholder, there is a separate yaml file that contains the data that should populate the placeholder.\n",
-    "\n",
-    "The data that fills the placeholder is defined as a YAML Anchor in a separate file, denoted by an ampersand (`&anchor`).\n",
-    "\n",
-    "[Learn more about YAML Aliases and Anchors here.](https://www.educative.io/blog/advanced-yaml-syntax-cheatsheet#:~:text=YAML%20Anchors%20and%20Alias)\n",
-    "\n",
-    "### Schedule\n",
-    "Users must define which combination of scenario variations should be loaded in each episode. This takes the form of a\n",
-    "YAML file with a relative path to the base scenario and a list of paths to be loaded in during each episode.\n",
-    "\n",
-    "It takes the following format:\n",
-    "\n"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "```yaml\n",
-    "base_scenario: base.yaml\n",
-    "schedule:\n",
-    "  0: # list of variations to load in at episode 0 (before the first call to env.reset() happens)\n",
-    "    - laydown_1.yaml\n",
-    "    - attack_1.yaml\n",
-    "  1: # list of variations to load in at episode 1 (after the first env.reset() call)\n",
-    "    - laydown_2.yaml\n",
-    "    - attack_2.yaml\n",
-    "```\n"
-   ]
-  },
  {
   "cell_type": "markdown",
   "metadata": {},
@@ -298,8 +254,8 @@
    "table = PrettyTable()\n",
    "table.field_names = [\"step\", \"Green Action\", \"Red Action\"]\n",
    "for i in range(21):\n",
-    "    green_action = env.game.agents['green_A'].action_history[i].action\n",
-    "    red_action = env.game.agents['red_A'].action_history[i].action\n",
+    "    green_action = env.game.agents['green_A'].history[i].action\n",
+    "    red_action = env.game.agents['red_A'].history[i].action\n",
    "    table.add_row([i, green_action, red_action])\n",
    "print(table)"
   ]
@@ -329,8 +285,8 @@
    "table = PrettyTable()\n",
    "table.field_names = [\"step\", \"Green Action\", \"Red Action\"]\n",
    "for i in range(21):\n",
-    "    green_action = env.game.agents['green_B'].action_history[i].action\n",
-    "    red_action = env.game.agents['red_B'].action_history[i].action\n",
+    "    green_action = env.game.agents['green_B'].history[i].action\n",
+    "    red_action = env.game.agents['red_B'].history[i].action\n",
    "    table.add_row([i, green_action, red_action])\n",
    "print(table)"
   ]
--- a/src/primaite/notebooks/multi-processing.ipynb
+++ b/src/primaite/notebooks/multi-processing.ipynb
@@ -4,8 +4,7 @@
   "cell_type": "markdown",
   "metadata": {},
   "source": [
-    "## Simple multi-processing demo using SubprocVecEnv from SB3\n",
-    "Based on a code example provided by Rachael Proctor."
+    "## Simple multi-processing demo using SubprocVecEnv from SB3"
   ]
  },
  {
--- a/src/primaite/session/environment.py
+++ b/src/primaite/session/environment.py
@@ -4,7 +4,6 @@ from typing import Any, Dict, Optional, SupportsFloat, Tuple, Union

 import gymnasium
 from gymnasium.core import ActType, ObsType
-from ray.rllib.env.multi_agent_env import MultiAgentEnv

 from primaite import getLogger
 from primaite.game.agent.interface import ProxyAgent
@@ -12,6 +11,7 @@ from primaite.game.game import PrimaiteGame
 from primaite.session.episode_schedule import build_scheduler, EpisodeScheduler
 from primaite.session.io import PrimaiteIO
 from primaite.simulator import SIM_OUTPUT
+from primaite.simulator.system.core.packet_capture import PacketCapture

 _LOGGER = getLogger(__name__)

@@ -63,7 +63,7 @@ class PrimaiteGymEnv(gymnasium.Env):
        terminated = False
        truncated = self.game.calculate_truncated()
        info = {
-            "agent_actions": {name: agent.action_history[-1] for name, agent in self.game.agents.items()}
+            "agent_actions": {name: agent.history[-1] for name, agent in self.game.agents.items()}
        }  # tell us what all the agents did for convenience.
        if self.game.save_step_metadata:
            self._write_step_metadata_json(step, action, state, reward)
@@ -94,9 +94,10 @@ class PrimaiteGymEnv(gymnasium.Env):
        self.average_reward_per_episode[self.episode_counter] = self.agent.reward_function.total_reward

        if self.io.settings.save_agent_actions:
-            all_agent_actions = {name: agent.action_history for name, agent in self.game.agents.items()}
-            self.io.write_agent_actions(agent_actions=all_agent_actions, episode=self.episode_counter)
+            all_agent_actions = {name: agent.history for name, agent in self.game.agents.items()}
+            self.io.write_agent_log(agent_actions=all_agent_actions, episode=self.episode_counter)
        self.episode_counter += 1
+        PacketCapture.clear()
        self.game: PrimaiteGame = PrimaiteGame.from_config(cfg=self.episode_scheduler(self.episode_counter))
        self.game.setup_for_episode(episode=self.episode_counter)
        state = self.game.get_sim_state()
@@ -130,166 +131,5 @@ class PrimaiteGymEnv(gymnasium.Env):
    def close(self):
        """Close the simulation."""
        if self.io.settings.save_agent_actions:
-            all_agent_actions = {name: agent.action_history for name, agent in self.game.agents.items()}
-            self.io.write_agent_actions(agent_actions=all_agent_actions, episode=self.episode_counter)
-
-
-class PrimaiteRayEnv(gymnasium.Env):
-    """Ray wrapper that accepts a single `env_config` parameter in init function for compatibility with Ray."""
-
-    def __init__(self, env_config: Dict) -> None:
-        """Initialise the environment.
-
-        :param env_config: A dictionary containing the environment configuration.
-        :type env_config: Dict
-        """
-        self.env = PrimaiteGymEnv(env_config=env_config)
-        # self.env.episode_counter -= 1
-        self.action_space = self.env.action_space
-        self.observation_space = self.env.observation_space
-
-    def reset(self, *, seed: int = None, options: dict = None) -> Tuple[ObsType, Dict]:
-        """Reset the environment."""
-        return self.env.reset(seed=seed)
-
-    def step(self, action: ActType) -> Tuple[ObsType, SupportsFloat, bool, bool, Dict]:
-        """Perform a step in the environment."""
-        return self.env.step(action)
-
-    def close(self):
-        """Close the simulation."""
-        self.env.close()
-
-    @property
-    def game(self) -> PrimaiteGame:
-        """Pass through game from env."""
-        return self.env.game
-
-
-class PrimaiteRayMARLEnv(MultiAgentEnv):
-    """Ray Environment that inherits from MultiAgentEnv to allow training MARL systems."""
-
-    def __init__(self, env_config: Dict) -> None:
-        """Initialise the environment.
-
-        :param env_config: A dictionary containing the environment configuration. It must contain a single key, `game`
-            which is the PrimaiteGame instance.
-        :type env_config: Dict
-        """
-        self.episode_counter: int = 0
-        """Current episode number."""
-        self.episode_scheduler: EpisodeScheduler = build_scheduler(env_config)
-        """Object that returns a config corresponding to the current episode."""
-        self.io = PrimaiteIO.from_config(self.episode_scheduler(0).get("io_settings", {}))
-        """Handles IO for the environment. This produces sys logs, agent logs, etc."""
-        self.game: PrimaiteGame = PrimaiteGame.from_config(self.episode_scheduler(self.episode_counter))
-        """Reference to the primaite game"""
-        self._agent_ids = list(self.game.rl_agents.keys())
-        """Agent ids. This is a list of strings of agent names."""
-
-        self.terminateds = set()
-        self.truncateds = set()
-        self.observation_space = gymnasium.spaces.Dict(
-            {
-                name: gymnasium.spaces.flatten_space(agent.observation_manager.space)
-                for name, agent in self.agents.items()
-            }
-        )
-        self.action_space = gymnasium.spaces.Dict(
-            {name: agent.action_manager.space for name, agent in self.agents.items()}
-        )
-
-        super().__init__()
-
-    @property
-    def agents(self) -> Dict[str, ProxyAgent]:
-        """Grab a fresh reference to the agents from this episode's game object."""
-        return {name: self.game.rl_agents[name] for name in self._agent_ids}
-
-    def reset(self, *, seed: int = None, options: dict = None) -> Tuple[ObsType, Dict]:
-        """Reset the environment."""
-        rewards = {name: agent.reward_function.total_reward for name, agent in self.agents.items()}
-        _LOGGER.info(f"Resetting environment, episode {self.episode_counter}, " f"avg. reward: {rewards}")
-
-        if self.io.settings.save_agent_actions:
-            all_agent_actions = {name: agent.action_history for name, agent in self.game.agents.items()}
-            self.io.write_agent_actions(agent_actions=all_agent_actions, episode=self.episode_counter)
-
-        self.episode_counter += 1
-        self.game: PrimaiteGame = PrimaiteGame.from_config(self.episode_scheduler(self.episode_counter))
-        self.game.setup_for_episode(episode=self.episode_counter)
-        state = self.game.get_sim_state()
-        self.game.update_agents(state)
-        next_obs = self._get_obs()
-        info = {}
-        return next_obs, info
-
-    def step(
-        self, actions: Dict[str, ActType]
-    ) -> Tuple[Dict[str, ObsType], Dict[str, SupportsFloat], Dict[str, bool], Dict[str, bool], Dict]:
-        """Perform a step in the environment. Adherent to Ray MultiAgentEnv step API.
-
-        :param actions: Dict of actions. The key is agent identifier and the value is a gymnasium action instance.
-        :type actions: Dict[str, ActType]
-        :return: Observations, rewards, terminateds, truncateds, and info. Each one is a dictionary keyed by agent
-            identifier.
-        :rtype: Tuple[Dict[str,ObsType], Dict[str, SupportsFloat], Dict[str,bool], Dict[str,bool], Dict]
-        """
-        step = self.game.step_counter
-        # 1. Perform actions
-        for agent_name, action in actions.items():
-            self.agents[agent_name].store_action(action)
-        self.game.pre_timestep()
-        self.game.apply_agent_actions()
-
-        # 2. Advance timestep
-        self.game.advance_timestep()
-
-        # 3. Get next observations
-        state = self.game.get_sim_state()
-        self.game.update_agents(state)
-        next_obs = self._get_obs()
-
-        # 4. Get rewards
-        rewards = {name: agent.reward_function.current_reward for name, agent in self.agents.items()}
-        _LOGGER.info(f"step: {self.game.step_counter}, Rewards: {rewards}")
-        terminateds = {name: False for name, _ in self.agents.items()}
-        truncateds = {name: self.game.calculate_truncated() for name, _ in self.agents.items()}
-        infos = {name: {} for name, _ in self.agents.items()}
-        terminateds["__all__"] = len(self.terminateds) == len(self.agents)
-        truncateds["__all__"] = self.game.calculate_truncated()
-        if self.game.save_step_metadata:
-            self._write_step_metadata_json(step, actions, state, rewards)
-        return next_obs, rewards, terminateds, truncateds, infos
-
-    def _write_step_metadata_json(self, step: int, actions: Dict, state: Dict, rewards: Dict):
-        output_dir = SIM_OUTPUT.path / f"episode_{self.episode_counter}" / "step_metadata"
-
-        output_dir.mkdir(parents=True, exist_ok=True)
-        path = output_dir / f"step_{step}.json"
-
-        data = {
-            "episode": self.episode_counter,
-            "step": step,
-            "actions": {agent_name: int(action) for agent_name, action in actions.items()},
-            "reward": rewards,
-            "state": state,
-        }
-        with open(path, "w") as file:
-            json.dump(data, file)
-
-    def _get_obs(self) -> Dict[str, ObsType]:
-        """Return the current observation."""
-        obs = {}
-        for agent_name in self._agent_ids:
-            agent = self.game.rl_agents[agent_name]
-            unflat_space = agent.observation_manager.space
-            unflat_obs = agent.observation_manager.current_observation
-            obs[agent_name] = gymnasium.spaces.flatten(unflat_space, unflat_obs)
-        return obs
-
-    def close(self):
-        """Close the simulation."""
-        if self.io.settings.save_agent_actions:
-            all_agent_actions = {name: agent.action_history for name, agent in self.game.agents.items()}
-            self.io.write_agent_actions(agent_actions=all_agent_actions, episode=self.episode_counter)
+            all_agent_actions = {name: agent.history for name, agent in self.game.agents.items()}
+            self.io.write_agent_log(agent_actions=all_agent_actions, episode=self.episode_counter)
--- a/src/primaite/session/io.py
+++ b/src/primaite/session/io.py
@@ -87,7 +87,7 @@ class PrimaiteIO:
        """Return the path where agent actions will be saved."""
        return self.session_path / "agent_actions" / f"episode_{episode}.json"

-    def write_agent_actions(self, agent_actions: Dict[str, List], episode: int) -> None:
+    def write_agent_log(self, agent_actions: Dict[str, List], episode: int) -> None:
        """Take the contents of the agent action log and write it to a file.

        :param episode: Episode number
--- a/src/primaite/session/ray_envs.py
+++ b/src/primaite/session/ray_envs.py
@@ -0,0 +1,177 @@
+import json
+from typing import Dict, SupportsFloat, Tuple
+
+import gymnasium
+from gymnasium.core import ActType, ObsType
+from ray.rllib.env.multi_agent_env import MultiAgentEnv
+
+from primaite.game.agent.interface import ProxyAgent
+from primaite.game.game import PrimaiteGame
+from primaite.session.environment import _LOGGER, PrimaiteGymEnv
+from primaite.session.episode_schedule import build_scheduler, EpisodeScheduler
+from primaite.session.io import PrimaiteIO
+from primaite.simulator import SIM_OUTPUT
+from primaite.simulator.system.core.packet_capture import PacketCapture
+
+
+class PrimaiteRayMARLEnv(MultiAgentEnv):
+    """Ray Environment that inherits from MultiAgentEnv to allow training MARL systems."""
+
+    def __init__(self, env_config: Dict) -> None:
+        """Initialise the environment.
+
+        :param env_config: A dictionary containing the environment configuration. It must contain a single key, `game`
+            which is the PrimaiteGame instance.
+        :type env_config: Dict
+        """
+        self.episode_counter: int = 0
+        """Current episode number."""
+        self.episode_scheduler: EpisodeScheduler = build_scheduler(env_config)
+        """Object that returns a config corresponding to the current episode."""
+        self.io = PrimaiteIO.from_config(self.episode_scheduler(0).get("io_settings", {}))
+        """Handles IO for the environment. This produces sys logs, agent logs, etc."""
+        self.game: PrimaiteGame = PrimaiteGame.from_config(self.episode_scheduler(self.episode_counter))
+        """Reference to the primaite game"""
+        self._agent_ids = list(self.game.rl_agents.keys())
+        """Agent ids. This is a list of strings of agent names."""
+
+        self.terminateds = set()
+        self.truncateds = set()
+        self.observation_space = gymnasium.spaces.Dict(
+            {
+                name: gymnasium.spaces.flatten_space(agent.observation_manager.space)
+                for name, agent in self.agents.items()
+            }
+        )
+        self.action_space = gymnasium.spaces.Dict(
+            {name: agent.action_manager.space for name, agent in self.agents.items()}
+        )
+        self._obs_space_in_preferred_format = True
+        self._action_space_in_preferred_format = True
+        super().__init__()
+
+    @property
+    def agents(self) -> Dict[str, ProxyAgent]:
+        """Grab a fresh reference to the agents from this episode's game object."""
+        return {name: self.game.rl_agents[name] for name in self._agent_ids}
+
+    def reset(self, *, seed: int = None, options: dict = None) -> Tuple[ObsType, Dict]:
+        """Reset the environment."""
+        rewards = {name: agent.reward_function.total_reward for name, agent in self.agents.items()}
+        _LOGGER.info(f"Resetting environment, episode {self.episode_counter}, " f"avg. reward: {rewards}")
+
+        if self.io.settings.save_agent_actions:
+            all_agent_actions = {name: agent.history for name, agent in self.game.agents.items()}
+            self.io.write_agent_log(agent_actions=all_agent_actions, episode=self.episode_counter)
+
+        self.episode_counter += 1
+        PacketCapture.clear()
+        self.game: PrimaiteGame = PrimaiteGame.from_config(self.episode_scheduler(self.episode_counter))
+        self.game.setup_for_episode(episode=self.episode_counter)
+        state = self.game.get_sim_state()
+        self.game.update_agents(state)
+        next_obs = self._get_obs()
+        info = {}
+        return next_obs, info
+
+    def step(
+        self, actions: Dict[str, ActType]
+    ) -> Tuple[Dict[str, ObsType], Dict[str, SupportsFloat], Dict[str, bool], Dict[str, bool], Dict]:
+        """Perform a step in the environment. Adherent to Ray MultiAgentEnv step API.
+
+        :param actions: Dict of actions. The key is agent identifier and the value is a gymnasium action instance.
+        :type actions: Dict[str, ActType]
+        :return: Observations, rewards, terminateds, truncateds, and info. Each one is a dictionary keyed by agent
+            identifier.
+        :rtype: Tuple[Dict[str,ObsType], Dict[str, SupportsFloat], Dict[str,bool], Dict[str,bool], Dict]
+        """
+        step = self.game.step_counter
+        # 1. Perform actions
+        for agent_name, action in actions.items():
+            self.agents[agent_name].store_action(action)
+        self.game.pre_timestep()
+        self.game.apply_agent_actions()
+
+        # 2. Advance timestep
+        self.game.advance_timestep()
+
+        # 3. Get next observations
+        state = self.game.get_sim_state()
+        self.game.update_agents(state)
+        next_obs = self._get_obs()
+
+        # 4. Get rewards
+        rewards = {name: agent.reward_function.current_reward for name, agent in self.agents.items()}
+        _LOGGER.info(f"step: {self.game.step_counter}, Rewards: {rewards}")
+        terminateds = {name: False for name, _ in self.agents.items()}
+        truncateds = {name: self.game.calculate_truncated() for name, _ in self.agents.items()}
+        infos = {name: {} for name, _ in self.agents.items()}
+        terminateds["__all__"] = len(self.terminateds) == len(self.agents)
+        truncateds["__all__"] = self.game.calculate_truncated()
+        if self.game.save_step_metadata:
+            self._write_step_metadata_json(step, actions, state, rewards)
+        return next_obs, rewards, terminateds, truncateds, infos
+
+    def _write_step_metadata_json(self, step: int, actions: Dict, state: Dict, rewards: Dict):
+        output_dir = SIM_OUTPUT.path / f"episode_{self.episode_counter}" / "step_metadata"
+
+        output_dir.mkdir(parents=True, exist_ok=True)
+        path = output_dir / f"step_{step}.json"
+
+        data = {
+            "episode": self.episode_counter,
+            "step": step,
+            "actions": {agent_name: int(action) for agent_name, action in actions.items()},
+            "reward": rewards,
+            "state": state,
+        }
+        with open(path, "w") as file:
+            json.dump(data, file)
+
+    def _get_obs(self) -> Dict[str, ObsType]:
+        """Return the current observation."""
+        obs = {}
+        for agent_name in self._agent_ids:
+            agent = self.game.rl_agents[agent_name]
+            unflat_space = agent.observation_manager.space
+            unflat_obs = agent.observation_manager.current_observation
+            obs[agent_name] = gymnasium.spaces.flatten(unflat_space, unflat_obs)
+        return obs
+
+    def close(self):
+        """Close the simulation."""
+        if self.io.settings.save_agent_actions:
+            all_agent_actions = {name: agent.history for name, agent in self.game.agents.items()}
+            self.io.write_agent_log(agent_actions=all_agent_actions, episode=self.episode_counter)
+
+
+class PrimaiteRayEnv(gymnasium.Env):
+    """Ray wrapper that accepts a single `env_config` parameter in init function for compatibility with Ray."""
+
+    def __init__(self, env_config: Dict) -> None:
+        """Initialise the environment.
+
+        :param env_config: A dictionary containing the environment configuration.
+        :type env_config: Dict
+        """
+        self.env = PrimaiteGymEnv(env_config=env_config)
+        # self.env.episode_counter -= 1
+        self.action_space = self.env.action_space
+        self.observation_space = self.env.observation_space
+
+    def reset(self, *, seed: int = None, options: dict = None) -> Tuple[ObsType, Dict]:
+        """Reset the environment."""
+        return self.env.reset(seed=seed)
+
+    def step(self, action: ActType) -> Tuple[ObsType, SupportsFloat, bool, bool, Dict]:
+        """Perform a step in the environment."""
+        return self.env.step(action)
+
+    def close(self):
+        """Close the simulation."""
+        self.env.close()
+
+    @property
+    def game(self) -> PrimaiteGame:
+        """Pass through game from env."""
+        return self.env.game
--- a/src/primaite/simulator/system/core/packet_capture.py
+++ b/src/primaite/simulator/system/core/packet_capture.py
@@ -21,6 +21,8 @@ class PacketCapture:
    The PCAPs are logged to: <simulation output directory>/<hostname>/<hostname>_<ip address>_pcap.log
    """

+    _logger_instances: List[logging.Logger] = []
+
    def __init__(
        self,
        hostname: str,
@@ -65,10 +67,12 @@ class PacketCapture:

        if outbound:
            self.outbound_logger = logging.getLogger(self._get_logger_name(outbound))
+            PacketCapture._logger_instances.append(self.outbound_logger)
            logger = self.outbound_logger
        else:
            self.inbound_logger = logging.getLogger(self._get_logger_name(outbound))
            logger = self.inbound_logger
+            PacketCapture._logger_instances.append(self.inbound_logger)

        logger.setLevel(60)  # Custom log level > CRITICAL to prevent any unwanted standard DEBUG-CRITICAL logs
        logger.addHandler(file_handler)
@@ -122,3 +126,13 @@ class PacketCapture:
        if SIM_OUTPUT.save_pcap_logs:
            msg = frame.model_dump_json()
            self.outbound_logger.log(level=60, msg=msg)  # Log at custom log level > CRITICAL
+
+    @staticmethod
+    def clear():
+        """Close all open PCAP file handlers."""
+        for logger in PacketCapture._logger_instances:
+            handlers = logger.handlers[:]
+            for handler in handlers:
+                logger.removeHandler(handler)
+                handler.close()
+        PacketCapture._logger_instances = []