Merged PR 142: v2.0.0

v2.0.0 Related work items: #901, #1523, #1574, #1594, #1595, #1597, #1623, #1626, #1629, #1631, #1632, #1635, #1637, #1638, #1639, #1640, #1641, #1647, #1648, #1650
2023-07-26 18:20:28 +00:00
parent 3e1ff0db17 46b3ab0959
commit db9aa3cdf0
144 changed files with 20060 additions and 3614 deletions
--- a/.azure/artifact-release-pipeline.yaml
+++ b/.azure/artifact-release-pipeline.yaml
@@ -1,38 +1,12 @@
 trigger:
 - main

+pool:
+  vmImage: ubuntu-latest
 strategy:
  matrix:
-    Ubuntu2004Python38:
-      python.version: '3.8'
-      imageName: 'ubuntu-20.04'
-    Ubuntu2004Python39:
-      python.version: '3.9'
-      imageName: 'ubuntu-20.04'
-    Ubuntu2004Python310:
+    Python310:
      python.version: '3.10'
-      imageName: 'ubuntu-20.04'
-    WindowsPython38:
-      python.version: '3.8'
-      imageName: 'windows-latest'
-    WindowsPython39:
-      python.version: '3.9'
-      imageName: 'windows-latest'
-    WindowsPython310:
-      python.version: '3.10'
-      imageName: 'windows-latest'
-    MacPython38:
-      python.version: '3.8'
-      imageName: 'macOS-latest'
-    MacPython39:
-      python.version: '3.9'
-      imageName: 'macOS-latest'
-    MacPython310:
-      python.version: '3.10'
-      imageName: 'macOS-latest'
-
-pool:
-  vmImage: $(imageName)

 steps:
 - task: UsePythonVersion@0
--- a/.azure/azure-build-deploy-docs-pipeline.yml
+++ b/.azure/azure-build-deploy-docs-pipeline.yml
@@ -29,6 +29,10 @@ jobs:
      pip install -e .[dev]
    displayName: 'Install Yawning-Titan for docs autosummary'

+  - script: |
+      primaite setup
+    displayName: 'Perform PrimAITE Setup'
+
  - script: |
      cd docs
      make html
--- a/.azure/azure-ci-build-pipeline.yaml
+++ b/.azure/azure-ci-build-pipeline.yaml
@@ -6,47 +6,85 @@ trigger:
 - bugfix/*
 - release/*

-pool:
-  vmImage: ubuntu-latest
-strategy:
-  matrix:
-    Python38:
-      python.version: '3.8'
-    Python39:
-      python.version: '3.9'
-    Python310:
-      python.version: '3.10'
-    Python311:
-      python.version: '3.11'
+parameters:
+  # https://stackoverflow.com/a/70046417
+  - name: matrix
+    type: object
+    default:
+    - job_name: 'UbuntuPython38'
+      py: '3.8'
+      img: 'ubuntu-latest'
+      every_time: false
+    - job_name: 'UbuntuPython310'
+      py: '3.10'
+      img: 'ubuntu-latest'
+      every_time: true
+    - job_name: 'WindowsPython38'
+      py: '3.8'
+      img: 'windows-latest'
+      every_time: false
+    - job_name: 'WindowsPython310'
+      py: '3.10'
+      img: 'windows-latest'
+      every_time: false
+    - job_name: 'MacOSPython38'
+      py: '3.8'
+      img: 'macOS-latest'
+      every_time: false
+    - job_name: 'MacOSPython310'
+      py: '3.10'
+      img: 'macOS-latest'
+      every_time: false

-steps:
- task: UsePythonVersion@0
-  inputs:
-    versionSpec: '$(python.version)'
-  displayName: 'Use Python $(python.version)'
+stages:
+  - stage: Test
+    jobs:
+    - ${{ each item in parameters.matrix }}:
+      - job: ${{ item.job_name }}
+        pool:
+          vmImage: ${{ item.img }}

- script: |
-    python -m pip install --upgrade pip==23.0.1
-    pip install wheel==0.38.4 --upgrade
-    pip install setuptools==66 --upgrade
-    pip install build==0.10.0
-    pip install pytest-azurepipelines
-  displayName: 'Install build dependencies'
+        condition: or( eq(variables['Build.Reason'], 'PullRequest'), ${{ item.every_time }} )

- script: |
-    python -m build
-  displayName: 'Build PrimAITE'
+        steps:
+          - task: UsePythonVersion@0
+            inputs:
+              versionSpec: ${{ item.py }}
+            displayName: 'Use Python ${{ item.py }}'

- script: |
-    PRIMAITE_WHEEL=$(ls ./dist/primaite*.whl)
-    python -m pip install $PRIMAITE_WHEEL[dev]
-  displayName: 'Install PrimAITE'
+          - script: |
+              python -m pip install pre-commit
+              pre-commit install
+              pre-commit run --all-files
+            displayName: 'Run pre-commits'

-#- script: |
-#    flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
-#    flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
-#  displayName: 'Lint with flake8'
+          - script: |
+              python -m pip install --upgrade pip==23.0.1
+              pip install wheel==0.38.4 --upgrade
+              pip install setuptools==66 --upgrade
+              pip install build==0.10.0
+              pip install pytest-azurepipelines
+            displayName: 'Install build dependencies'

- script: |
-    pytest tests/
-  displayName: 'Run unmarked tests'
+          - script: |
+              python -m build
+            displayName: 'Build PrimAITE'
+
+          - script: |
+              PRIMAITE_WHEEL=$(ls ./dist/primaite*.whl)
+              python -m pip install $PRIMAITE_WHEEL[dev]
+            displayName: 'Install PrimAITE'
+            condition: or(eq( variables['Agent.OS'], 'Linux' ), eq( variables['Agent.OS'], 'Darwin' ))
+
+          - script: |
+              forfiles /p dist\ /m *.whl /c "cmd /c python -m pip install @file[dev]"
+            displayName: 'Install PrimAITE'
+            condition: eq( variables['Agent.OS'], 'Windows_NT' )
+
+          - script: |
+              primaite setup
+            displayName: 'Perform PrimAITE Setup'
+
+          - script: |
+              pytest -n 4
+            displayName: 'Run tests'
--- a/.azuredevops/pull_request_template.md
+++ b/.azuredevops/pull_request_template.md
@@ -0,0 +1,12 @@
+## Summary
+*Replace this text with an explanation of what the changes are and how you implemented them. Can this impact any other parts of the codebase that we should keep in mind?*
+
+## Test process
+*How have you tested this (if applicable)?*
+
+## Checklist
+- [ ] This PR is linked to a **work item**
+- [ ] I have performed **self-review** of the code
+- [ ] I have written **tests** for any new functionality added with this PR
+- [ ] I have updated the **documentation** if this PR changes or adds functionality
+- [ ] I have run **pre-commit** checks for code style
--- a/.flake8
+++ b/.flake8
@@ -8,5 +8,6 @@ extend-ignore =
    E203
    E712
    D401
+    F811
 exclude =
    docs/source/*
--- a/.gitignore
+++ b/.gitignore
@@ -1,10 +1,3 @@
-# PrimAITE Package
-src/primaite/outputs
-src/primaite/outputs/*
-src/primaite/logs
-src/primaite/logs/*
-TestResults
-
 # Byte-compiled / optimized / DLL files
 __pycache__/
 *.py[cod]
@@ -57,6 +50,9 @@ coverage.xml
 .hypothesis/
 .pytest_cache/
 cover/
+tests/assets/**/*.png
+tests/assets/**/tensorboard_logs/
+tests/assets/**/checkpoints/

 # Translations
 *.mo
@@ -77,6 +73,7 @@ instance/

 # Sphinx documentation
 docs/_build/
+docs/source/_autosummary

 # PyBuilder
 .pybuilder/
@@ -143,3 +140,13 @@ dmypy.json

 # Cython debug symbols
 cython_debug/
+
+# IDE
+.idea/
+docs/source/primaite-dependencies.rst
+
+# outputs
+src/primaite/outputs/
+
+# benchmark session outputs
+benchmark/output
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -13,6 +13,9 @@ repos:
    rev: 23.1.0
    hooks:
      - id: black
+        args: [ "--line-length=120" ]
+        additional_dependencies:
+          - jupyter
  - repo: http://github.com/pycqa/isort
    rev: 5.12.0
    hooks:
@@ -22,4 +25,5 @@ repos:
    rev: 6.0.0
    hooks:
      - id: flake8
-        additional_dependencies: [ flake8-docstrings ]
+        additional_dependencies:
+          - flake8-docstrings
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -0,0 +1,90 @@
+# Changelog
+
+All notable changes to this project will be documented in this file.
+
+The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
+and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
+
+## [Unreleased]
+
+## [2.0.0] - 2023-07-26
+
+### Added
+- Command Line Interface (CLI) for easy access and streamlined usage of PrimAITE.
+- Application Directories to enable PrimAITE as a Python package with predefined directories for storage.
+- Support for Ray Rllib, allowing training of PPO and A2C agents using Stable Baselines3 and Ray RLlib.
+- Random Red Agent to train the blue agent against, with options for randomised Red Agent `POL` and `IER`.
+- Repeatability of sessions through seed settings, and deterministic or stochastic evaluation options.
+- Session loading to revisit previously run sessions for SB3 Agents.
+- Agent Session Classes (`AgentSessionABC` and `HardCodedAgentSessionABC`) to standardise agent training with a common interface.
+- Standardised Session Output in a structured format in the user's app sessions directory, providing four types of outputs:
+  1. Session Metadata
+  2. Results
+  3. Diagrams
+  4. Saved agents (training checkpoints and a final trained agent).
+- Configurable Observation Space managed by the `ObservationHandler` class for a more flexible observation space setup.
+- Benchmarking of PrimAITE performance, showcasing session and step durations for reference.
+- Documentation overhaul, including automatic API and test documentation with recursive Sphinx auto-summary, using the Furo theme for responsive light/dark theme, and enhanced navigation with `sphinx-code-tabs` and `sphinx-copybutton`.
+
+### Changed
+- Action Space updated to discrete spaces, introducing a new `ANY` action space option for combined `NODE` and `ACL` actions.
+- Improved `Node` attribute naming convention for consistency, now adhering to `Pascal Case`.
+- Package Structure has been refactored for better build, distribution, and installation, with all source code now in the `src/` directory, and the `PRIMAITE` Python package renamed to `primaite` to adhere to PEP-8 Package & Module Names.
+- Docs and Tests now sit outside the `src/` directory.
+- Non-python files (example config files, Jupyter notebooks, etc.) now sit inside a `*/_package_data/` directory in their respective sub-packages.
+- All dependencies are now defined in the `pyproject.toml` file.
+- Introduced individual configuration for the number of episodes and time steps for training and evaluation sessions, with separate config values for each.
+- Decoupled the lay down config file from the training config, allowing more flexibility in configuration management.
+- Updated `Transactions` to only report pre-action observation, improving the CSV header and providing more human-readable descriptions for columns relating to observations.
+- Changes to `AccessControlList`, where the `acl` dictionary is now a list to accommodate changes to ACL action space and positioning of `ACLRules` inside the list to signal their level of priority.
+
+
+### Fixed
+- Various bug fixes, including Green IERs separation, correct clearing of links in the reference environment, and proper reward calculation.
+- Logic to check if a node is OFF before executing actions on the node by the blue agent, preventing erroneous state changes.
+- Improved functionality of Resetting a Node, adding "SHUTTING DOWN" and "BOOTING" operating states for more reliable reset commands.
+- Corrected the order of actions in the `Primaite` env to ensure the blue agent uses the current state for decision-making.
+
+## [1.1.1] - 2023-06-27
+
+### Bug Fixes
+* Fixed bug whereby 'reference' environment links reach bandwidth capacity and are never cleared due to green & red IERs being applied to them. This bug had a knock-on effect that meant IERs were being blocked based on the full capacity of links on the reference environment which was not correct; they should only be based on the link capacity of the 'live' environment. This fix has been addressed by:
+  * Implementing a reference copy of all green IERs (`self.green_iers_reference`).
+  * Clearing the traffic on reference IERs at the same time as the live IERs.
+  * Passing the `green_iers_reference` to the `apply_iers` function at the reference stage.
+  * Passing the `green_iers_reference` as an additional argument to `calculate_reward_function`.
+  * Updating the green IERs section of the `calculate_reward_function` to now take into account both the green reference IERs and live IERs. The `green_ier_blocked` reward is only applied if the IER is blocked in the live environment but is running in the reference environment.
+  * Re-ordering the actions taken as part of the step function to ensure the blue action happens first before other changes.
+  * Removing the unnecessary "Reapply PoL and IERs" action from the step function.
+  * Moving the deep-copy of nodes and links to below the "Implement blue action" stage of the step function.
+
+## [1.1.0] - 2023-03-13
+
+### Added
+* The user can now initiate either a TRAINING session or an EVALUATION (test) session with the Stable Baselines 3 (SB3) agents via the config_main.yaml file. During evaluation/testing, the agent policy will be fixed (no longer learning) and subjected to the SB3 `evaluate_policy()` function.
+* The user can choose whether a saved agent is loaded into the session (with reference to a URL) via the `config_main.yaml` file. They specify a Boolean true/false indicating whether a saved agent should be loaded, and specify the URL and file name.
+* Active and Service nodes now possess a new "File System State" attribute. This attribute is permitted to have the states GOOD, CORRUPT, DESTROYED, REPAIRING, and RESTORING. This new feature affects the following components:
+  * Blue agent observation space;
+  * Blue agent action space;
+  * Reward function;
+  * Node pattern-of-life.
+* The Red Agent node pattern-of-life has been enhanced so that node PoL is triggered by an 'initiator'. The initiator is either DIRECT (state change is applied to the node without any conditions), IER (state change is applied to the node based on IER entry condition), or SERVICE (state change is applied to the node based on a service state condition on the same node or a different node within the network).
+* New default config named "config_5_DATA_MANIPULATION.yaml" and associated Training Use Case Profile.
+* NodeStateInstruction has been split into `NodeStateInstructionGreen` and `NodeStateInstructionRed` to reflect the changes within the red agent pattern-of-life capability.
+* The reward function has been enhanced so that node attribute states of resetting, patching, repairing, and restarting contribute to the overall reward value.
+* The User Guide has been updated to reflect all the above changes.
+
+### Changed
+* "config_1_DDOS_BASIC.yaml" modified to make it more simplistic to aid evaluation testing.
+* "config_2_DDOS_BASIC.yaml" updated to reflect the addition of the File System State and the Red Agent node pattern-of-life enhancement.
+* "config_3_DOS_VERY_BASIC.yaml" updated to reflect the addition of the File System State and the Red Agent node pattern-of-life enhancement.
+* "config_UNIT_TEST.yaml" is a copy of the new "config_5_DATA_MANIPULATION.yaml" file.
+* Updates to Transactions.
+
+### Fixed
+* Fixed "config_2_DDOS_BASIC.yaml" by adding another ACL rule to allow traffic to flow from Node 9 to Node 3. Previously, there was no rule, so one of the green IERs could not flow by default.
+
+
+
+[unreleased]: https://github.com/Autonomous-Resilient-Cyber-Defence/PrimAITE/compare/v2.0.0...HEAD
+[2.0.0]: https://github.com/Autonomous-Resilient-Cyber-Defence/PrimAITE/releases/tag/v2.0.0
--- a/21
+++ b/21
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2023 - 2025 Defence Science and Technology Laboratory UK (https://dstl.gov.uk)
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -1 +1,2 @@
-include src/primaite/config/*.yaml
+include src/primaite/setup/_package_data/primaite_config.yaml
+include src/primaite/config/_package_data/*.yaml
--- a/README.md
+++ b/README.md
@@ -1 +1,64 @@
 # PrimAITE
+
+## Getting Started with PrimAITE
+
+### Pre-Requisites
+
+In order to get **PrimAITE** installed, you will need to have the following installed:
+
+- `python3.8+`
+- `python3-pip`
+- `virtualenv`
+
+**PrimAITE** is designed to be OS-agnostic, and thus should work on most variations/distros of Linux, Windows, and MacOS.
+
+### Installation from source
+#### 1. Navigate to the PrimAITE folder and create a new python virtual environment (venv)
+
+```unix
+python3 -m venv <name_of_venv>
+```
+
+#### 2. Activate the venv
+
+##### Unix
+```bash
+source <name_of_venv>/bin/activate
+```
+
+##### Windows
+```powershell
+.\<name_of_venv>\Scripts\activate
+```
+
+#### 3. Install `primaite` into the venv along with all of it's dependencies
+
+```bash
+python3 -m pip install -e .
+```
+
+### Development Installation
+To install the development dependencies, postfix the command in step 3 above with the `[dev]` extra. Example:
+
+```bash
+python3 -m pip install -e .[dev]
+```
+
+## Building documentation
+The PrimAITE documentation can be built with the following commands:
+
+##### Unix
+```bash
+cd docs
+make html
+```
+
+##### Windows
+```powershell
+cd docs
+.\make.bat html
+```
+
+This will build the documentation as a collection of HTML files which uses the Read The Docs sphinx theme. Other build
+options are available but may require additional dependencies such as LaTeX and PDF. Please refer to the Sphinx documentation
+for your specific output requirements.
--- a/benchmark/config/benchmark_training_config.yaml
+++ b/benchmark/config/benchmark_training_config.yaml
@@ -0,0 +1,164 @@
+# © Crown-owned copyright 2023, Defence Science and Technology Laboratory UK
+# Training Config File
+
+# Sets which agent algorithm framework will be used.
+# Options are:
+# "SB3" (Stable Baselines3)
+# "RLLIB" (Ray RLlib)
+# "CUSTOM" (Custom Agent)
+agent_framework: SB3
+
+# Sets which deep learning framework will be used (by RLlib ONLY).
+# Default is TF (Tensorflow).
+# Options are:
+# "TF" (Tensorflow)
+# TF2 (Tensorflow 2.X)
+# TORCH (PyTorch)
+deep_learning_framework: TF2
+
+# Sets which Agent class will be used.
+# Options are:
+# "A2C" (Advantage Actor Critic coupled with either SB3 or RLLIB agent_framework)
+# "PPO" (Proximal Policy Optimization coupled with either SB3 or RLLIB agent_framework)
+# "HARDCODED" (The HardCoded agents coupled with an ACL or NODE action_type)
+# "DO_NOTHING" (The DoNothing agents coupled with an ACL or NODE action_type)
+# "RANDOM" (primaite.agents.simple.RandomAgent)
+# "DUMMY" (primaite.agents.simple.DummyAgent)
+agent_identifier: PPO
+
+# Sets whether Red Agent POL and IER is randomised.
+# Options are:
+# True
+# False
+random_red_agent: False
+
+# The (integer) seed to be used in random number generation
+# Default is None (null)
+seed: null
+
+# Set whether the agent will be deterministic instead of stochastic
+# Options are:
+# True
+# False
+deterministic: False
+
+# Sets what view of the environment the deterministic hardcoded agent has. The default is BASIC.
+# Options are:
+# "BASIC" (The current observation space only)
+# "FULL" (Full environment view with actions taken and reward feedback)
+hard_coded_agent_view: FULL
+
+# Sets How the Action Space is defined:
+# "NODE"
+# "ACL"
+# "ANY" node and acl actions
+action_type: NODE
+
+# observation space
+observation_space:
+  flatten: true
+  components:
+    - name: NODE_LINK_TABLE
+    - name: NODE_STATUSES
+    - name: LINK_TRAFFIC_LEVELS
+
+# Number of episodes for training to run per session
+num_train_episodes: 500
+
+# Number of time_steps for training per episode
+num_train_steps: 256
+
+# Number of episodes for evaluation to run per session
+num_eval_episodes: 1
+
+# Number of time_steps for evaluation per episode
+num_eval_steps: 256
+
+# Sets how often the agent will save a checkpoint (every n time episodes).
+# Set to 0 if no checkpoints are required. Default is 10
+checkpoint_every_n_episodes: 0
+
+# Time delay (milliseconds) between steps for CUSTOM agents.
+time_delay: 5
+
+# Type of session to be run. Options are:
+# "TRAIN" (Trains an agent)
+# "EVAL" (Evaluates an agent)
+# "TRAIN_EVAL" (Trains then evaluates an agent)
+session_type: TRAIN
+
+# Environment config values
+# The high value for the observation space
+observation_space_high_value: 1000000000
+
+# The Stable Baselines3 learn/eval output verbosity level:
+# Options are:
+# "NONE" (No Output)
+# "INFO" (Info Messages (such as devices and wrappers used))
+# "DEBUG" (All Messages)
+sb3_output_verbose_level: NONE
+
+# Reward values
+# Generic
+all_ok: 0
+# Node Hardware State
+off_should_be_on: -0.001
+off_should_be_resetting: -0.0005
+on_should_be_off: -0.0002
+on_should_be_resetting: -0.0005
+resetting_should_be_on: -0.0005
+resetting_should_be_off: -0.0002
+resetting: -0.0003
+# Node Software or Service State
+good_should_be_patching: 0.0002
+good_should_be_compromised: 0.0005
+good_should_be_overwhelmed: 0.0005
+patching_should_be_good: -0.0005
+patching_should_be_compromised: 0.0002
+patching_should_be_overwhelmed: 0.0002
+patching: -0.0003
+compromised_should_be_good: -0.002
+compromised_should_be_patching: -0.002
+compromised_should_be_overwhelmed: -0.002
+compromised: -0.002
+overwhelmed_should_be_good: -0.002
+overwhelmed_should_be_patching: -0.002
+overwhelmed_should_be_compromised: -0.002
+overwhelmed: -0.002
+# Node File System State
+good_should_be_repairing: 0.0002
+good_should_be_restoring: 0.0002
+good_should_be_corrupt: 0.0005
+good_should_be_destroyed: 0.001
+repairing_should_be_good: -0.0005
+repairing_should_be_restoring: 0.0002
+repairing_should_be_corrupt: 0.0002
+repairing_should_be_destroyed: 0.0000
+repairing: -0.0003
+restoring_should_be_good: -0.001
+restoring_should_be_repairing: -0.0002
+restoring_should_be_corrupt: 0.0001
+restoring_should_be_destroyed: 0.0002
+restoring: -0.0006
+corrupt_should_be_good: -0.001
+corrupt_should_be_repairing: -0.001
+corrupt_should_be_restoring: -0.001
+corrupt_should_be_destroyed: 0.0002
+corrupt: -0.001
+destroyed_should_be_good: -0.002
+destroyed_should_be_repairing: -0.002
+destroyed_should_be_restoring: -0.002
+destroyed_should_be_corrupt: -0.002
+destroyed: -0.002
+scanning: -0.0002
+# IER status
+red_ier_running: -0.0005
+green_ier_blocked: -0.001
+
+# Patching / Reset durations
+os_patching_duration: 5            # The time taken to patch the OS
+node_reset_duration: 5             # The time taken to reset a node (hardware)
+service_patching_duration: 5       # The time taken to patch a service
+file_system_repairing_limit: 5      # The time take to repair the file system
+file_system_restoring_limit: 5      # The time take to restore the file system
+file_system_scanning_limit: 5       # The time taken to scan the file system
--- a/benchmark/primaite_benchmark.py
+++ b/benchmark/primaite_benchmark.py
@@ -0,0 +1,449 @@
+# © Crown-owned copyright 2023, Defence Science and Technology Laboratory UK
+import json
+import platform
+import shutil
+import sys
+from datetime import datetime
+from pathlib import Path
+from typing import Any, Dict, Final, Optional, Tuple, Union
+from unittest.mock import patch
+
+import GPUtil
+import plotly.graph_objects as go
+import polars as pl
+import psutil
+import yaml
+from plotly.graph_objs import Figure
+from pylatex import Command, Document
+from pylatex import Figure as LatexFigure
+from pylatex import Section, Subsection, Tabular
+from pylatex.utils import bold
+
+import primaite
+from primaite.config.lay_down_config import data_manipulation_config_path
+from primaite.data_viz.session_plots import get_plotly_config
+from primaite.environment.primaite_env import Primaite
+from primaite.primaite_session import PrimaiteSession
+
+_LOGGER = primaite.getLogger(__name__)
+
+_BENCHMARK_ROOT = Path(__file__).parent
+_RESULTS_ROOT: Final[Path] = _BENCHMARK_ROOT / "results"
+_RESULTS_ROOT.mkdir(exist_ok=True, parents=True)
+
+_OUTPUT_ROOT: Final[Path] = _BENCHMARK_ROOT / "output"
+# Clear and recreate the output directory
+if _OUTPUT_ROOT.exists():
+    shutil.rmtree(_OUTPUT_ROOT)
+_OUTPUT_ROOT.mkdir()
+
+_TRAINING_CONFIG_PATH = _BENCHMARK_ROOT / "config" / "benchmark_training_config.yaml"
+_LAY_DOWN_CONFIG_PATH = data_manipulation_config_path()
+
+
+def get_size(size_bytes: int):
+    """
+    Scale bytes to its proper format.
+
+    e.g:
+        1253656 => '1.20MB'
+        1253656678 => '1.17GB'
+
+    :
+    """
+    factor = 1024
+    for unit in ["", "K", "M", "G", "T", "P"]:
+        if size_bytes < factor:
+            return f"{size_bytes:.2f}{unit}B"
+        size_bytes /= factor
+
+
+def _get_system_info() -> Dict:
+    """Builds and returns a dict containing system info."""
+    uname = platform.uname()
+    cpu_freq = psutil.cpu_freq()
+    virtual_mem = psutil.virtual_memory()
+    swap_mem = psutil.swap_memory()
+    gpus = GPUtil.getGPUs()
+    return {
+        "System": {
+            "OS": uname.system,
+            "OS Version": uname.version,
+            "Machine": uname.machine,
+            "Processor": uname.processor,
+        },
+        "CPU": {
+            "Physical Cores": psutil.cpu_count(logical=False),
+            "Total Cores": psutil.cpu_count(logical=True),
+            "Max Frequency": f"{cpu_freq.max:.2f}Mhz",
+        },
+        "Memory": {"Total": get_size(virtual_mem.total), "Swap Total": get_size(swap_mem.total)},
+        "GPU": [{"Name": gpu.name, "Total Memory": f"{gpu.memoryTotal}MB"} for gpu in gpus],
+    }
+
+
+def _build_benchmark_latex_report(
+    benchmark_metadata_dict: Dict, this_version_plot_path: Path, all_version_plot_path: Path
+):
+    geometry_options = {"tmargin": "2.5cm", "rmargin": "2.5cm", "bmargin": "2.5cm", "lmargin": "2.5cm"}
+    data = benchmark_metadata_dict
+    primaite_version = data["primaite_version"]
+
+    # Create a new document
+    doc = Document("report", geometry_options=geometry_options)
+    # Title
+    doc.preamble.append(Command("title", f"PrimAITE {primaite_version} Learning Benchmark"))
+    doc.preamble.append(Command("author", "PrimAITE Dev Team"))
+    doc.preamble.append(Command("date", datetime.now().date()))
+    doc.append(Command("maketitle"))
+
+    sessions = data["total_sessions"]
+    episodes = data["training_config"]["num_train_episodes"]
+    steps = data["training_config"]["num_train_steps"]
+
+    # Body
+    with doc.create(Section("Introduction")):
+        doc.append(
+            f"PrimAITE v{primaite_version} was benchmarked automatically upon release. Learning rate metrics "
+            f"were captured to be referenced during system-level testing and user acceptance testing (UAT)."
+        )
+        doc.append(
+            f"\nThe benchmarking process consists of running {sessions} training session using the same "
+            f"training and lay down config files. Each session trains an agent for {episodes} episodes, "
+            f"with each episode consisting of {steps} steps."
+        )
+        doc.append(
+            f"\nThe mean reward per episode from each session is captured. This is then used to calculate a "
+            f"combined average reward per episode from the {sessions} individual sessions for smoothing. "
+            f"Finally, a 25-widow rolling average of the combined average reward per session is calculated for "
+            f"further smoothing."
+        )
+
+    with doc.create(Section("System Information")):
+        with doc.create(Subsection("Python")):
+            with doc.create(Tabular("|l|l|")) as table:
+                table.add_hline()
+                table.add_row((bold("Version"), sys.version))
+                table.add_hline()
+        for section, section_data in data["system_info"].items():
+            if section_data:
+                with doc.create(Subsection(section)):
+                    if isinstance(section_data, dict):
+                        with doc.create(Tabular("|l|l|")) as table:
+                            table.add_hline()
+                            for key, value in section_data.items():
+                                table.add_row((bold(key), value))
+                                table.add_hline()
+                    elif isinstance(section_data, list):
+                        headers = section_data[0].keys()
+                        tabs_str = "|".join(["l" for _ in range(len(headers))])
+                        tabs_str = f"|{tabs_str}|"
+                        with doc.create(Tabular(tabs_str)) as table:
+                            table.add_hline()
+                            table.add_row([bold(h) for h in headers])
+                            table.add_hline()
+                            for item in section_data:
+                                table.add_row(item.values())
+                                table.add_hline()
+
+    headers_map = {
+        "total_sessions": "Total Sessions",
+        "total_episodes": "Total Episodes",
+        "total_time_steps": "Total Steps",
+        "av_s_per_session": "Av Session Duration (s)",
+        "av_s_per_step": "Av Step Duration (s)",
+        "av_s_per_100_steps_10_nodes": "Av Duration per 100 Steps per 10 Nodes (s)",
+    }
+    with doc.create(Section("Stats")):
+        with doc.create(Subsection("Benchmark Results")):
+            with doc.create(Tabular("|l|l|")) as table:
+                table.add_hline()
+                for section, header in headers_map.items():
+                    if section.startswith("av_"):
+                        table.add_row((bold(header), f"{data[section]:.4f}"))
+                    else:
+                        table.add_row((bold(header), data[section]))
+                    table.add_hline()
+
+    with doc.create(Section("Graphs")):
+        with doc.create(Subsection(f"PrimAITE {primaite_version} Learning Benchmark Plot")):
+            with doc.create(LatexFigure(position="h!")) as pic:
+                pic.add_image(str(this_version_plot_path))
+                pic.add_caption(f"PrimAITE {primaite_version} Learning Benchmark Plot")
+
+        with doc.create(Subsection("PrimAITE All Versions Learning Benchmark Plot")):
+            with doc.create(LatexFigure(position="h!")) as pic:
+                pic.add_image(str(all_version_plot_path))
+                pic.add_caption("PrimAITE All Versions Learning Benchmark Plot")
+
+    doc.generate_pdf(str(this_version_plot_path).replace(".png", ""), clean_tex=True)
+
+
+class BenchmarkPrimaiteSession(PrimaiteSession):
+    """A benchmarking primaite session."""
+
+    def __init__(
+        self,
+        training_config_path: Union[str, Path],
+        lay_down_config_path: Union[str, Path],
+    ):
+        super().__init__(training_config_path, lay_down_config_path)
+        self.setup()
+
+    @property
+    def env(self) -> Primaite:
+        """Direct access to the env for ease of testing."""
+        return self._agent_session._env  # noqa
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, type, value, tb):
+        shutil.rmtree(self.session_path)
+        _LOGGER.debug(f"Deleted benchmark session directory: {self.session_path}")
+
+    def _learn_benchmark_durations(self) -> Tuple[float, float, float]:
+        """
+        Calculate and return the learning benchmark durations.
+
+        Calculates the:
+        - Total learning time in seconds
+        - Total learning time per time step in seconds
+        - Total learning time per 100 time steps per 10 nodes in seconds
+
+        :return: The learning benchmark durations as a Tuple of three floats:
+            Tuple[total_s, s_per_step, s_per_100_steps_10_nodes].
+        """
+        data = self.metadata_file_as_dict()
+        start_dt = datetime.fromisoformat(data["start_datetime"])
+        end_dt = datetime.fromisoformat(data["end_datetime"])
+        delta = end_dt - start_dt
+        total_s = delta.total_seconds()
+
+        total_steps = data["learning"]["total_time_steps"]
+        s_per_step = total_s / total_steps
+
+        num_nodes = self.env.num_nodes
+        num_intervals = total_steps / 100
+        av_interval_time = total_s / num_intervals
+        s_per_100_steps_10_nodes = av_interval_time / (num_nodes / 10)
+
+        return total_s, s_per_step, s_per_100_steps_10_nodes
+
+    def learn_metadata_dict(self) -> Dict[str, Any]:
+        """Metadata specific to the learning session."""
+        total_s, s_per_step, s_per_100_steps_10_nodes = self._learn_benchmark_durations()
+        return {
+            "total_episodes": self.env.actual_episode_count,
+            "total_time_steps": self.env.total_step_count,
+            "total_s": total_s,
+            "s_per_step": s_per_step,
+            "s_per_100_steps_10_nodes": s_per_100_steps_10_nodes,
+            "av_reward_per_episode": self.learn_av_reward_per_episode_dict(),
+        }
+
+
+def _get_benchmark_session_path(session_timestamp: datetime) -> Path:
+    return _OUTPUT_ROOT / session_timestamp.strftime("%Y-%m-%d_%H-%M-%S")
+
+
+def _get_benchmark_primaite_session() -> BenchmarkPrimaiteSession:
+    with patch("primaite.agents.agent_abc.get_session_path", _get_benchmark_session_path) as mck:
+        mck.session_timestamp = datetime.now()
+        return BenchmarkPrimaiteSession(_TRAINING_CONFIG_PATH, _LAY_DOWN_CONFIG_PATH)
+
+
+def _build_benchmark_results_dict(start_datetime: datetime, metadata_dict: Dict) -> dict:
+    n = len(metadata_dict)
+    with open(_TRAINING_CONFIG_PATH, "r") as file:
+        training_config_dict = yaml.safe_load(file)
+    with open(_LAY_DOWN_CONFIG_PATH, "r") as file:
+        lay_down_config_dict = yaml.safe_load(file)
+    averaged_data = {
+        "start_timestamp": start_datetime.isoformat(),
+        "end_datetime": datetime.now().isoformat(),
+        "primaite_version": primaite.__version__,
+        "system_info": _get_system_info(),
+        "total_sessions": n,
+        "total_episodes": sum(d["total_episodes"] for d in metadata_dict.values()),
+        "total_time_steps": sum(d["total_time_steps"] for d in metadata_dict.values()),
+        "av_s_per_session": sum(d["total_s"] for d in metadata_dict.values()) / n,
+        "av_s_per_step": sum(d["s_per_step"] for d in metadata_dict.values()) / n,
+        "av_s_per_100_steps_10_nodes": sum(d["s_per_100_steps_10_nodes"] for d in metadata_dict.values()) / n,
+        "combined_av_reward_per_episode": {},
+        "session_av_reward_per_episode": {k: v["av_reward_per_episode"] for k, v in metadata_dict.items()},
+        "training_config": training_config_dict,
+        "lay_down_config": lay_down_config_dict,
+    }
+
+    episodes = metadata_dict[1]["av_reward_per_episode"].keys()
+
+    for episode in episodes:
+        combined_av_reward = sum(metadata_dict[k]["av_reward_per_episode"][episode] for k in metadata_dict.keys()) / n
+        averaged_data["combined_av_reward_per_episode"][episode] = combined_av_reward
+
+    return averaged_data
+
+
+def _get_df_from_episode_av_reward_dict(data: Dict):
+    data: Dict = {"episode": data.keys(), "av_reward": data.values()}
+
+    return (
+        pl.from_dict(data)
+        .with_columns(rolling_mean=pl.col("av_reward").rolling_mean(window_size=25))
+        .rename({"rolling_mean": "rolling_av_reward"})
+    )
+
+
+def _plot_benchmark_metadata(
+    benchmark_metadata_dict: Dict,
+    title: Optional[str] = None,
+    subtitle: Optional[str] = None,
+) -> Figure:
+    if title:
+        if subtitle:
+            title = f"{title} <br>{subtitle}</sup>"
+    else:
+        if subtitle:
+            title = subtitle
+
+    config = get_plotly_config()
+    layout = go.Layout(
+        autosize=config["size"]["auto_size"],
+        width=config["size"]["width"],
+        height=config["size"]["height"],
+    )
+    # Create the line graph with a colored line
+    fig = go.Figure(layout=layout)
+    fig.update_layout(template=config["template"])
+
+    for session, av_reward_dict in benchmark_metadata_dict["session_av_reward_per_episode"].items():
+        df = _get_df_from_episode_av_reward_dict(av_reward_dict)
+        fig.add_trace(
+            go.Scatter(
+                x=df["episode"],
+                y=df["av_reward"],
+                mode="lines",
+                name=f"Session {session}",
+                opacity=0.25,
+                line={"color": "#a6a6a6"},
+            )
+        )
+
+    df = _get_df_from_episode_av_reward_dict(benchmark_metadata_dict["combined_av_reward_per_episode"])
+    fig.add_trace(
+        go.Scatter(
+            x=df["episode"], y=df["av_reward"], mode="lines", name="Combined Session Av", line={"color": "#FF0000"}
+        )
+    )
+
+    fig.add_trace(
+        go.Scatter(
+            x=df["episode"],
+            y=df["rolling_av_reward"],
+            mode="lines",
+            name="Rolling Av (Combined Session Av)",
+            line={"color": "#4CBB17"},
+        )
+    )
+
+    # Set the layout of the graph
+    fig.update_layout(
+        xaxis={
+            "title": "Episode",
+            "type": "linear",
+        },
+        yaxis={"title": "Average Reward"},
+        title=title,
+    )
+
+    return fig
+
+
+def _plot_all_benchmarks_combined_session_av():
+    """
+    Plot the Benchmark results for each released version of PrimAITE.
+
+    Does this by iterating over the ``benchmark/results`` directory and
+    extracting the benchmark metadata json for each version that has been
+    benchmarked. The combined_av_reward_per_episode is extracted from each,
+    converted into a polars dataframe, and plotted as a scatter line in plotly.
+    """
+    title = "PrimAITE Versions Learning Benchmark"
+    subtitle = "Rolling Av (Combined Session Av)"
+    if title:
+        if subtitle:
+            title = f"{title} <br>{subtitle}</sup>"
+    else:
+        if subtitle:
+            title = subtitle
+    config = get_plotly_config()
+    layout = go.Layout(
+        autosize=config["size"]["auto_size"],
+        width=config["size"]["width"],
+        height=config["size"]["height"],
+    )
+    # Create the line graph with a colored line
+    fig = go.Figure(layout=layout)
+    fig.update_layout(template=config["template"])
+
+    for dir in _RESULTS_ROOT.iterdir():
+        if dir.is_dir():
+            metadata_file = dir / f"{dir.name}_benchmark_metadata.json"
+            with open(metadata_file, "r") as file:
+                metadata_dict = json.load(file)
+            df = _get_df_from_episode_av_reward_dict(metadata_dict["combined_av_reward_per_episode"])
+
+            fig.add_trace(go.Scatter(x=df["episode"], y=df["rolling_av_reward"], mode="lines", name=dir.name))
+
+    # Set the layout of the graph
+    fig.update_layout(
+        xaxis={
+            "title": "Episode",
+            "type": "linear",
+        },
+        yaxis={"title": "Average Reward"},
+        title=title,
+    )
+    fig["data"][0]["showlegend"] = True
+
+    return fig
+
+
+def run():
+    """Run the PrimAITE benchmark."""
+    start_datetime = datetime.now()
+    av_reward_per_episode_dicts = {}
+    for i in range(1, 11):
+        print(f"Starting Benchmark Session: {i}")
+        with _get_benchmark_primaite_session() as session:
+            session.learn()
+            av_reward_per_episode_dicts[i] = session.learn_metadata_dict()
+
+    benchmark_metadata = _build_benchmark_results_dict(
+        start_datetime=start_datetime, metadata_dict=av_reward_per_episode_dicts
+    )
+    v_str = f"v{primaite.__version__}"
+
+    version_result_dir = _RESULTS_ROOT / v_str
+    if version_result_dir.exists():
+        shutil.rmtree(version_result_dir)
+    version_result_dir.mkdir(exist_ok=True, parents=True)
+
+    with open(version_result_dir / f"{v_str}_benchmark_metadata.json", "w") as file:
+        json.dump(benchmark_metadata, file, indent=4)
+    title = f"PrimAITE v{primaite.__version__.strip()} Learning Benchmark"
+    fig = _plot_benchmark_metadata(benchmark_metadata, title=title)
+    this_version_plot_path = version_result_dir / f"{title}.png"
+    fig.write_image(this_version_plot_path)
+
+    fig = _plot_all_benchmarks_combined_session_av()
+
+    all_version_plot_path = _RESULTS_ROOT / "PrimAITE Versions Learning Benchmark.png"
+    fig.write_image(all_version_plot_path)
+
+    _build_benchmark_latex_report(benchmark_metadata, this_version_plot_path, all_version_plot_path)
+
+
+if __name__ == "__main__":
+    run()
--- a/benchmark/results/PrimAITE
+++ b/benchmark/results/PrimAITE
--- a/benchmark/results/v2.0.0/PrimAITE
+++ b/benchmark/results/v2.0.0/PrimAITE
--- a/benchmark/results/v2.0.0/PrimAITE
+++ b/benchmark/results/v2.0.0/PrimAITE
--- a/benchmark/results/v2.0.0/v2.0.0_benchmark_metadata.json
+++ b/benchmark/results/v2.0.0/v2.0.0_benchmark_metadata.json
--- a/diagram/classes.puml
+++ b/diagram/classes.puml
@@ -0,0 +1,521 @@
+@startuml classes
+set namespaceSeparator none
+class "ACLRule" as primaite.acl.acl_rule.ACLRule {
+  dest_ip : str
+  permission
+  port : str
+  protocol : str
+  source_ip : str
+  get_dest_ip() -> str
+  get_permission() -> str
+  get_port() -> str
+  get_protocol() -> str
+  get_source_ip() -> str
+}
+class "AbstractObservationComponent" as primaite.environment.observations.AbstractObservationComponent {
+  current_observation : NotImplementedType, ndarray
+  env : str
+  space : Space
+  structure : List[str]
+  {abstract}generate_structure() -> List[str]
+  {abstract}update() -> None
+}
+class "AccessControlList" as primaite.acl.access_control_list.AccessControlList {
+  acl
+  acl_implicit_permission
+  acl_implicit_rule
+  max_acl_rules : int
+  add_rule(_permission: RulePermissionType, _source_ip: str, _dest_ip: str, _protocol: str, _port: str, _position: str) -> None
+  check_address_match(_rule: ACLRule, _source_ip_address: str, _dest_ip_address: str) -> bool
+  get_dictionary_hash(_permission: RulePermissionType, _source_ip: str, _dest_ip: str, _protocol: str, _port: str) -> int
+  get_relevant_rules(_source_ip_address: str, _dest_ip_address: str, _protocol: str, _port: str) -> Dict[int, ACLRule]
+  is_blocked(_source_ip_address: str, _dest_ip_address: str, _protocol: str, _port: str) -> bool
+  remove_all_rules() -> None
+  remove_rule(_permission: RulePermissionType, _source_ip: str, _dest_ip: str, _protocol: str, _port: str) -> None
+}
+class "AccessControlList_" as primaite.environment.observations.AccessControlList_ {
+  current_observation : ndarray
+  space : MultiDiscrete
+  structure : list
+  generate_structure() -> List[str]
+  update() -> None
+}
+
+class "ActiveNode" as primaite.nodes.active_node.ActiveNode {
+  file_system_action_count : int
+  file_system_scanning : bool
+  file_system_scanning_count : int
+  file_system_state_actual : GOOD
+  file_system_state_observed : REPAIRING, RESTORING, GOOD
+  ip_address : str
+  patching_count : int
+  software_state
+  software_state : GOOD
+  set_file_system_state(file_system_state: FileSystemState) -> None
+  set_file_system_state_if_not_compromised(file_system_state: FileSystemState) -> None
+  set_software_state_if_not_compromised(software_state: SoftwareState) -> None
+  start_file_system_scan() -> None
+  update_booting_status() -> None
+  update_file_system_state() -> None
+  update_os_patching_status() -> None
+  update_resetting_status() -> None
+}
+class "AgentSessionABC" as primaite.agents.agent_abc.AgentSessionABC {
+  checkpoints_path
+  evaluation_path
+  is_eval : bool
+  learning_path
+  sb3_output_verbose_level : NONE
+  session_path : Union[str, Path]
+  session_timestamp : datetime
+  timestamp_str
+  uuid
+  close() -> None
+  {abstract}evaluate() -> None
+  {abstract}export() -> None
+  {abstract}learn() -> None
+  load(path: Union[str, Path]) -> None
+  {abstract}save() -> None
+}
+
+class "DoNothingACLAgent" as primaite.agents.simple.DoNothingACLAgent {
+}
+class "DoNothingNodeAgent" as primaite.agents.simple.DoNothingNodeAgent {
+}
+class "DummyAgent" as primaite.agents.simple.DummyAgent {
+}
+class "HardCodedACLAgent" as primaite.agents.hardcoded_acl.HardCodedACLAgent {
+  get_allow_acl_rules(source_node_id: int, dest_node_id: str, protocol: int, port: str, acl: AccessControlList, nodes: Dict[str, NodeUnion], services_list: List[str]) -> Dict[int, ACLRule]
+  get_allow_acl_rules_for_ier(ier: IER, acl: AccessControlList, nodes: Dict[str, NodeUnion]) -> Dict[int, ACLRule]
+  get_blocked_green_iers(green_iers: Dict[str, IER], acl: AccessControlList, nodes: Dict[str, NodeUnion]) -> Dict[str, IER]
+  get_blocking_acl_rules_for_ier(ier: IER, acl: AccessControlList, nodes: Dict[str, NodeUnion]) -> Dict[int, ACLRule]
+  get_deny_acl_rules(source_node_id: int, dest_node_id: str, protocol: int, port: str, acl: AccessControlList, nodes: Dict[str, NodeUnion], services_list: List[str]) -> Dict[int, ACLRule]
+  get_matching_acl_rules(source_node_id: str, dest_node_id: str, protocol: str, port: str, acl: AccessControlList, nodes: Dict[str, Union[ServiceNode, ActiveNode]], services_list: List[str]) -> Dict[int, ACLRule]
+  get_matching_acl_rules_for_ier(ier: IER, acl: AccessControlList, nodes: Dict[str, NodeUnion]) -> Dict[int, ACLRule]
+}
+class "HardCodedAgentSessionABC" as primaite.agents.hardcoded_abc.HardCodedAgentSessionABC {
+  is_eval : bool
+  evaluate() -> None
+  export() -> None
+  learn() -> None
+  load(path: Union[str, Path]) -> None
+  save() -> None
+}
+class "HardCodedNodeAgent" as primaite.agents.hardcoded_node.HardCodedNodeAgent {
+}
+class "IER" as primaite.pol.ier.IER {
+  dest_node_id : str
+  end_step : int
+  id : str
+  load : int
+  mission_criticality : int
+  port : str
+  protocol : str
+  running : bool
+  source_node_id : str
+  start_step : int
+  get_dest_node_id() -> str
+  get_end_step() -> int
+  get_id() -> str
+  get_is_running() -> bool
+  get_load() -> int
+  get_mission_criticality() -> int
+  get_port() -> str
+  get_protocol() -> str
+  get_source_node_id() -> str
+  get_start_step() -> int
+  set_is_running(_value: bool) -> None
+}
+class "Link" as primaite.links.link.Link {
+  bandwidth : int
+  dest_node_name : str
+  id : str
+  protocol_list : List[Protocol]
+  source_node_name : str
+  add_protocol(_protocol: str) -> None
+  add_protocol_load(_protocol: str, _load: int) -> None
+  clear_traffic() -> None
+  get_bandwidth() -> int
+  get_current_load() -> int
+  get_dest_node_name() -> str
+  get_id() -> str
+  get_protocol_list() -> List[Protocol]
+  get_source_node_name() -> str
+}
+class "LinkTrafficLevels" as primaite.environment.observations.LinkTrafficLevels {
+  current_observation : ndarray
+  space : MultiDiscrete
+  structure : list
+  generate_structure() -> List[str]
+  update() -> None
+}
+class "Node" as primaite.nodes.node.Node {
+  booting_count : int
+  config_values
+  hardware_state : BOOTING, ON, RESETTING, OFF
+  name : Final[str]
+  node_id : Final[str]
+  node_type : Final[NodeType]
+  priority
+  resetting_count : int
+  shutting_down_count : int
+  reset() -> None
+  turn_off() -> None
+  turn_on() -> None
+  update_booting_status() -> None
+  update_resetting_status() -> None
+  update_shutdown_status() -> None
+}
+class "NodeLinkTable" as primaite.environment.observations.NodeLinkTable {
+  current_observation : ndarray
+  space : Box
+  structure : list
+  generate_structure() -> List[str]
+  update() -> None
+}
+class "NodeStateInstructionGreen" as primaite.nodes.node_state_instruction_green.NodeStateInstructionGreen {
+  end_step : int
+  id : str
+  node_id : str
+  node_pol_type : str
+  service_name : str
+  start_step : int
+  state : Union['HardwareState', 'SoftwareState', 'FileSystemState']
+  get_end_step() -> int
+  get_node_id() -> str
+  get_node_pol_type() -> 'NodePOLType'
+  get_service_name() -> str
+  get_start_step() -> int
+  get_state() -> Union['HardwareState', 'SoftwareState', 'FileSystemState']
+}
+class "NodeStateInstructionRed" as primaite.nodes.node_state_instruction_red.NodeStateInstructionRed {
+  end_step : int
+  id : str
+  initiator : str
+  pol_type
+  service_name : str
+  source_node_id : str
+  source_node_service : str
+  source_node_service_state : str
+  start_step : int
+  state : Union['HardwareState', 'SoftwareState', 'FileSystemState']
+  target_node_id : str
+  get_end_step() -> int
+  get_initiator() -> 'NodePOLInitiator'
+  get_pol_type() -> NodePOLType
+  get_service_name() -> str
+  get_source_node_id() -> str
+  get_source_node_service() -> str
+  get_source_node_service_state() -> str
+  get_start_step() -> int
+  get_state() -> Union['HardwareState', 'SoftwareState', 'FileSystemState']
+  get_target_node_id() -> str
+}
+class "NodeStatuses" as primaite.environment.observations.NodeStatuses {
+  current_observation : ndarray
+  space : MultiDiscrete
+  structure : list
+  generate_structure() -> List[str]
+  update() -> None
+}
+class "ObservationsHandler" as primaite.environment.observations.ObservationsHandler {
+  current_observation
+  registered_obs_components : List[AbstractObservationComponent]
+  space
+  deregister(obs_component: AbstractObservationComponent) -> None
+  describe_structure() -> List[str]
+  from_config(env: 'Primaite', obs_space_config: dict) -> 'ObservationsHandler'
+  register(obs_component: AbstractObservationComponent) -> None
+  update_obs() -> None
+  update_space() -> None
+}
+class "PassiveNode" as primaite.nodes.passive_node.PassiveNode {
+  ip_address
+}
+class "Primaite" as primaite.environment.primaite_env.Primaite {
+  ACTION_SPACE_ACL_ACTION_VALUES : int
+  ACTION_SPACE_ACL_PERMISSION_VALUES : int
+  ACTION_SPACE_NODE_ACTION_VALUES : int
+  ACTION_SPACE_NODE_PROPERTY_VALUES : int
+  acl
+  action_dict : dict, Dict[int, List[int]]
+  action_space : Discrete, Space
+  action_type : int
+  actual_episode_count
+  agent_identifier
+  average_reward : float
+  env_obs : ndarray, tuple
+  episode_av_reward_writer
+  episode_count : int
+  episode_steps : int
+  green_iers : Dict[str, IER]
+  green_iers_reference : Dict[str, IER]
+  lay_down_config
+  links : Dict[str, Link]
+  links_post_blue : dict
+  links_post_pol : dict
+  links_post_red : dict
+  links_reference : Dict[str, Link]
+  max_number_acl_rules : int
+  network : Graph
+  network_reference : Graph
+  node_pol : Dict[str, NodeStateInstructionGreen]
+  nodes : Dict[str, NodeUnion]
+  nodes_post_blue : dict
+  nodes_post_pol : dict
+  nodes_post_red : dict
+  nodes_reference : Dict[str, NodeUnion]
+  num_links : int
+  num_nodes : int
+  num_ports : int
+  num_services : int
+  obs_config : dict
+  obs_handler
+  observation_space : Tuple, Box, Space
+  observation_type
+  ports_list : List[str]
+  red_iers : Dict[str, IER], dict
+  red_node_pol : dict, Dict[str, NodeStateInstructionRed]
+  services_list : List[str]
+  session_path : Final[Path]
+  step_count : int
+  step_info : Dict[Any]
+  timestamp_str : Final[str]
+  total_reward : float
+  total_step_count : int
+  training_config
+  transaction_writer
+  apply_actions_to_acl(_action: int) -> None
+  apply_actions_to_nodes(_action: int) -> None
+  apply_time_based_updates() -> None
+  close() -> None
+  create_acl_action_dict() -> Dict[int, List[int]]
+  create_acl_rule(item: Dict) -> None
+  create_green_ier(item: Dict) -> None
+  create_green_pol(item: Dict) -> None
+  create_link(item: Dict) -> None
+  create_node(item: Dict) -> None
+  create_node_action_dict() -> Dict[int, List[int]]
+  create_node_and_acl_action_dict() -> Dict[int, List[int]]
+  create_ports_list(ports: Dict) -> None
+  create_red_ier(item: Dict) -> None
+  create_red_pol(item: Dict) -> None
+  create_services_list(services: Dict) -> None
+  get_action_info(action_info: Dict) -> None
+  get_observation_info(observation_info: Dict) -> None
+  init_acl() -> None
+  init_observations() -> Tuple[spaces.Space, np.ndarray]
+  interpret_action_and_apply(_action: int) -> None
+  load_lay_down_config() -> None
+  output_link_status() -> None
+  reset() -> np.ndarray
+  reset_environment() -> None
+  reset_node(item: Dict) -> None
+  save_obs_config(obs_config: dict) -> None
+  set_as_eval() -> None
+  step(action: int) -> Tuple[np.ndarray, float, bool, Dict]
+  update_environent_obs() -> None
+}
+class "PrimaiteSession" as primaite.primaite_session.PrimaiteSession {
+  evaluation_path : Optional[Path], Path
+  is_load_session : bool
+  learning_path : Optional[Path], Path
+  session_path : Optional[Path], Path
+  timestamp_str : str, Optional[str]
+  close() -> None
+  evaluate() -> None
+  learn() -> None
+  setup() -> None
+}
+class "Protocol" as primaite.common.protocol.Protocol {
+  load : int
+  name : str
+  add_load(_load: int) -> None
+  clear_load() -> None
+  get_load() -> int
+  get_name() -> str
+}
+class "RLlibAgent" as primaite.agents.rllib.RLlibAgent {
+  {abstract}evaluate() -> None
+  {abstract}export() -> None
+  learn() -> None
+  {abstract}load(path: Union[str, Path]) -> RLlibAgent
+  save(overwrite_existing: bool) -> None
+}
+class "RandomAgent" as primaite.agents.simple.RandomAgent {
+}
+class "SB3Agent" as primaite.agents.sb3.SB3Agent {
+  is_eval : bool
+  evaluate() -> None
+  {abstract}export() -> None
+  learn() -> None
+  save() -> None
+}
+class "Service" as primaite.common.service.Service {
+  name : str
+  patching_count : int
+  port : str
+  software_state : GOOD
+  reduce_patching_count() -> None
+}
+class "ServiceNode" as primaite.nodes.service_node.ServiceNode {
+  services : Dict[str, Service]
+  add_service(service: Service) -> None
+  get_service_state(protocol_name: str) -> SoftwareState
+  has_service(protocol_name: str) -> bool
+  service_is_overwhelmed(protocol_name: str) -> bool
+  service_running(protocol_name: str) -> bool
+  set_service_state(protocol_name: str, software_state: SoftwareState) -> None
+  set_service_state_if_not_compromised(protocol_name: str, software_state: SoftwareState) -> None
+  update_booting_status() -> None
+  update_resetting_status() -> None
+  update_services_patching_status() -> None
+}
+class "SessionOutputWriter" as primaite.utils.session_output_writer.SessionOutputWriter {
+  learning_session : bool
+  transaction_writer : bool
+  close() -> None
+  write(data: Union[Tuple, Transaction]) -> None
+}
+class "TrainingConfig" as primaite.config.training_config.TrainingConfig {
+  action_type
+  agent_framework
+  agent_identifier
+  agent_load_file : Optional[str]
+  all_ok : float
+  checkpoint_every_n_episodes : int
+  compromised : float
+  compromised_should_be_good : float
+  compromised_should_be_overwhelmed : float
+  compromised_should_be_patching : float
+  corrupt : float
+  corrupt_should_be_destroyed : float
+  corrupt_should_be_good : float
+  corrupt_should_be_repairing : float
+  corrupt_should_be_restoring : float
+  deep_learning_framework
+  destroyed : float
+  destroyed_should_be_corrupt : float
+  destroyed_should_be_good : float
+  destroyed_should_be_repairing : float
+  destroyed_should_be_restoring : float
+  deterministic : bool
+  file_system_repairing_limit : int
+  file_system_restoring_limit : int
+  file_system_scanning_limit : int
+  good_should_be_compromised : float
+  good_should_be_corrupt : float
+  good_should_be_destroyed : float
+  good_should_be_overwhelmed : float
+  good_should_be_patching : float
+  good_should_be_repairing : float
+  good_should_be_restoring : float
+  green_ier_blocked : float
+  hard_coded_agent_view
+  implicit_acl_rule
+  load_agent : bool
+  max_number_acl_rules : int
+  node_booting_duration : int
+  node_reset_duration : int
+  node_shutdown_duration : int
+  num_eval_episodes : int
+  num_eval_steps : int
+  num_train_episodes : int
+  num_train_steps : int
+  observation_space : dict
+  observation_space_high_value : int
+  off_should_be_on : float
+  off_should_be_resetting : float
+  on_should_be_off : float
+  on_should_be_resetting : float
+  os_patching_duration : int
+  overwhelmed : float
+  overwhelmed_should_be_compromised : float
+  overwhelmed_should_be_good : float
+  overwhelmed_should_be_patching : float
+  patching : float
+  patching_should_be_compromised : float
+  patching_should_be_good : float
+  patching_should_be_overwhelmed : float
+  random_red_agent : bool
+  red_ier_running : float
+  repairing : float
+  repairing_should_be_corrupt : float
+  repairing_should_be_destroyed : float
+  repairing_should_be_good : float
+  repairing_should_be_restoring : float
+  resetting : float
+  resetting_should_be_off : float
+  resetting_should_be_on : float
+  restoring : float
+  restoring_should_be_corrupt : float
+  restoring_should_be_destroyed : float
+  restoring_should_be_good : float
+  restoring_should_be_repairing : float
+  sb3_output_verbose_level
+  scanning : float
+  seed : Optional[int]
+  service_patching_duration : int
+  session_type
+  time_delay : int
+  from_dict(config_dict: Dict[str, Any]) -> TrainingConfig
+  to_dict(json_serializable: bool) -> Dict
+}
+class "Transaction" as primaite.transactions.transaction.Transaction {
+  action_space : Optional[int]
+  agent_identifier
+  episode_number : int
+  obs_space : str
+  obs_space_description : NoneType, Optional[List[str]], list
+  obs_space_post : Optional[Union['np.ndarray', Tuple['np.ndarray']]]
+  obs_space_pre : Optional[Union['np.ndarray', Tuple['np.ndarray']]]
+  reward : Optional[float], float
+  step_number : int
+  timestamp : datetime
+  as_csv_data() -> Tuple[List, List]
+}
+primaite.agents.hardcoded_abc.HardCodedAgentSessionABC --|> primaite.agents.agent_abc.AgentSessionABC
+primaite.agents.hardcoded_acl.HardCodedACLAgent --|> primaite.agents.hardcoded_abc.HardCodedAgentSessionABC
+primaite.agents.hardcoded_node.HardCodedNodeAgent --|> primaite.agents.hardcoded_abc.HardCodedAgentSessionABC
+primaite.agents.rllib.RLlibAgent --|> primaite.agents.agent_abc.AgentSessionABC
+primaite.agents.sb3.SB3Agent --|> primaite.agents.agent_abc.AgentSessionABC
+primaite.agents.simple.DoNothingACLAgent --|> primaite.agents.hardcoded_abc.HardCodedAgentSessionABC
+primaite.agents.simple.DoNothingNodeAgent --|> primaite.agents.hardcoded_abc.HardCodedAgentSessionABC
+primaite.agents.simple.DummyAgent --|> primaite.agents.hardcoded_abc.HardCodedAgentSessionABC
+primaite.agents.simple.RandomAgent --|> primaite.agents.hardcoded_abc.HardCodedAgentSessionABC
+primaite.environment.observations.AccessControlList_ --|> primaite.environment.observations.AbstractObservationComponent
+primaite.environment.observations.LinkTrafficLevels --|> primaite.environment.observations.AbstractObservationComponent
+primaite.environment.observations.NodeLinkTable --|> primaite.environment.observations.AbstractObservationComponent
+primaite.environment.observations.NodeStatuses --|> primaite.environment.observations.AbstractObservationComponent
+primaite.nodes.active_node.ActiveNode --|> primaite.nodes.node.Node
+primaite.nodes.passive_node.PassiveNode --|> primaite.nodes.node.Node
+primaite.nodes.service_node.ServiceNode --|> primaite.nodes.active_node.ActiveNode
+primaite.common.service.Service --|> primaite.nodes.service_node.ServiceNode
+primaite.acl.access_control_list.AccessControlList --* primaite.environment.primaite_env.Primaite : acl
+primaite.acl.acl_rule.ACLRule --* primaite.acl.access_control_list.AccessControlList : acl_implicit_rule
+primaite.agents.hardcoded_acl.HardCodedACLAgent --* primaite.primaite_session.PrimaiteSession : _agent_session
+primaite.agents.hardcoded_node.HardCodedNodeAgent --* primaite.primaite_session.PrimaiteSession : _agent_session
+primaite.agents.rllib.RLlibAgent --* primaite.primaite_session.PrimaiteSession : _agent_session
+primaite.agents.sb3.SB3Agent --* primaite.primaite_session.PrimaiteSession : _agent_session
+primaite.agents.simple.DoNothingACLAgent --* primaite.primaite_session.PrimaiteSession : _agent_session
+primaite.agents.simple.DoNothingNodeAgent --* primaite.primaite_session.PrimaiteSession : _agent_session
+primaite.agents.simple.DummyAgent --* primaite.primaite_session.PrimaiteSession : _agent_session
+primaite.agents.simple.RandomAgent --* primaite.primaite_session.PrimaiteSession : _agent_session
+primaite.config.training_config.TrainingConfig --* primaite.agents.agent_abc.AgentSessionABC : _training_config
+primaite.config.training_config.TrainingConfig --* primaite.environment.primaite_env.Primaite : training_config
+primaite.environment.observations.ObservationsHandler --* primaite.environment.primaite_env.Primaite : obs_handler
+primaite.environment.primaite_env.Primaite --* primaite.agents.agent_abc.AgentSessionABC : _env
+primaite.environment.primaite_env.Primaite --* primaite.agents.hardcoded_abc.HardCodedAgentSessionABC : _env
+primaite.environment.primaite_env.Primaite --* primaite.agents.sb3.SB3Agent : _env
+primaite.utils.session_output_writer.SessionOutputWriter --* primaite.environment.primaite_env.Primaite : episode_av_reward_writer
+primaite.utils.session_output_writer.SessionOutputWriter --* primaite.environment.primaite_env.Primaite : transaction_writer
+primaite.config.training_config.TrainingConfig --o primaite.nodes.node.Node : config_values
+primaite.nodes.node_state_instruction_green.NodeStateInstructionGreen --* primaite.environment.primaite_env.Primaite
+primaite.nodes.node_state_instruction_red.NodeStateInstructionRed --* primaite.environment.primaite_env.Primaite
+primaite.pol.ier.IER --* primaite.environment.primaite_env.Primaite
+primaite.common.protocol.Protocol --o primaite.links.link.Link
+primaite.links.link.Link --* primaite.environment.primaite_env.Primaite
+primaite.config.training_config.TrainingConfig --o primaite.nodes.active_node.ActiveNode
+primaite.utils.session_output_writer.SessionOutputWriter --> primaite.transactions.transaction.Transaction
+primaite.transactions.transaction.Transaction --> primaite.environment.primaite_env.Primaite
+@enduml
--- a/docs/Makefile
+++ b/docs/Makefile
@@ -1,6 +1,4 @@
 # Minimal makefile for Sphinx documentation
-#
-
 # You can set these variables from the command line, and also
 # from the environment for the first two.
 SPHINXOPTS    ?=
@@ -8,13 +6,29 @@ SPHINXBUILD   ?= sphinx-build
 SOURCEDIR     = .
 BUILDDIR      = _build

+AUTOSUMMARY="source\_autosummary"
+
+# Remove command is different depending on OS
+ifdef OS
+	RM = IF exist $(AUTOSUMMARY) (  RMDIR $(AUTOSUMMARY) /s /q )
+else
+   ifeq ($(shell uname), Linux)
+      RM = rm -rf $(AUTOSUMMARY)
+   endif
+endif
+
 # Put it first so that "make" without argument is like "make help".
 help:
 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)

 .PHONY: help Makefile

+clean:
+	$(RM)
+
 # Catch-all target: route all unknown targets to Sphinx using the new
 # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
-%: Makefile
+%: Makefile | clean
+	pip-licenses --format=rst --with-urls --output-file=source/primaite-dependencies.rst
+
 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
--- a/docs/_static/.gitkeep
+++ b/docs/_static/.gitkeep
--- a/docs/_templates/custom-class-template.rst
+++ b/docs/_templates/custom-class-template.rst
@@ -0,0 +1,41 @@
+.. only:: comment
+
+    © Crown-owned copyright 2023, Defence Science and Technology Laboratory UK
+
+..
+    Credit to https://github.com/JamesALeedham/Sphinx-Autosummary-Recursion for the custom templates.
+..
+{{ fullname | escape | underline}}
+
+.. currentmodule:: {{ module }}
+
+.. autoclass:: {{ objname }}
+   :members:
+   :show-inheritance:
+   :inherited-members:
+   :special-members: __init__, __call__, __add__, __mul__
+
+   {% block methods %}
+   {% if methods %}
+   .. rubric:: {{ _('Methods') }}
+
+   .. autosummary::
+      :nosignatures:
+   {% for item in methods %}
+      {%- if not item.startswith('_') %}
+      ~{{ name }}.{{ item }}
+      {%- endif -%}
+   {%- endfor %}
+   {% endif %}
+   {% endblock %}
+
+   {% block attributes %}
+   {% if attributes %}
+   .. rubric:: {{ _('Attributes') }}
+
+   .. autosummary::
+   {% for item in attributes %}
+      ~{{ name }}.{{ item }}
+   {%- endfor %}
+   {% endif %}
+   {% endblock %}
--- a/docs/_templates/custom-module-template.rst
+++ b/docs/_templates/custom-module-template.rst
@@ -0,0 +1,73 @@
+.. only:: comment
+
+    © Crown-owned copyright 2023, Defence Science and Technology Laboratory UK
+
+..
+    Credit to https://github.com/JamesALeedham/Sphinx-Autosummary-Recursion for the custom templates.
+..
+{{ fullname | escape | underline}}
+
+.. automodule:: {{ fullname }}
+
+   {% block attributes %}
+   {% if attributes %}
+   .. rubric:: Module attributes
+
+   .. autosummary::
+      :toctree:
+   {% for item in attributes %}
+      {{ item }}
+   {%- endfor %}
+   {% endif %}
+   {% endblock %}
+
+   {% block functions %}
+   {% if functions %}
+   .. rubric:: {{ _('Functions') }}
+
+   .. autosummary::
+      :toctree:
+      :nosignatures:
+   {% for item in functions %}
+      {{ item }}
+   {%- endfor %}
+   {% endif %}
+   {% endblock %}
+
+   {% block classes %}
+   {% if classes %}
+   .. rubric:: {{ _('Classes') }}
+
+   .. autosummary::
+      :toctree:
+      :template: custom-class-template.rst
+      :nosignatures:
+   {% for item in classes %}
+      {{ item }}
+   {%- endfor %}
+   {% endif %}
+   {% endblock %}
+
+   {% block exceptions %}
+   {% if exceptions %}
+   .. rubric:: {{ _('Exceptions') }}
+
+   .. autosummary::
+      :toctree:
+   {% for item in exceptions %}
+      {{ item }}
+   {%- endfor %}
+   {% endif %}
+   {% endblock %}
+
+{% block modules %}
+{% if modules %}
+.. autosummary::
+   :toctree:
+   :template: custom-module-template.rst
+   :recursive:
+{% for item in modules %}
+   {{ item }}
+{%- endfor %}
+{% endif %}
+{% endblock %}
--- a/docs/api.rst
+++ b/docs/api.rst
@@ -0,0 +1,20 @@
+.. only:: comment
+
+    © Crown-owned copyright 2023, Defence Science and Technology Laboratory UK
+
+..
+   DO NOT DELETE THIS FILE! It contains the all-important `.. autosummary::` directive with `:recursive:` option, without
+   which API documentation wouldn't get extracted from docstrings by the `sphinx.ext.autosummary` engine. It is hidden
+   (not declared in any toctree) to remove an unnecessary intermediate page; index.rst instead points directly to the
+   package page. DO NOT REMOVE THIS FILE!
+
+    Credit to https://github.com/JamesALeedham/Sphinx-Autosummary-Recursion for the custom templates.
+..
+
+.. autosummary::
+   :toctree: source/_autosummary
+   :template: custom-module-template.rst
+   :recursive:
+
+   primaite
+   tests
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -1,3 +1,4 @@
+# © Crown-owned copyright 2023, Defence Science and Technology Laboratory UK
 # Configuration file for the Sphinx documentation builder.
 #
 # For the full list of built-in configuration values, see the documentation:
@@ -17,9 +18,9 @@ sys.path.insert(0, os.path.abspath("../"))

 # -- Project information -----------------------------------------------------
 year = datetime.datetime.now().year
-project = "primaite"
-copyright = f"Copyright (C) QinetiQ Training and Simulation Ltd 2021 - {year}"
-author = "QinetiQ Training and Simulation Ltd"
+project = "PrimAITE"
+copyright = f"Copyright (C) Defence Science and Technology Laboratory UK 2021 - {year}"
+author = "Defence Science and Technology Laboratory UK"

 # The short Major.Minor.Build version
 with open("../src/primaite/VERSION", "r") as file:
@@ -27,10 +28,24 @@ with open("../src/primaite/VERSION", "r") as file:
 # The full version, including alpha/beta/rc tags
 release = version

+html_title = f"{project} v{release} docs"
+
 # -- General configuration ---------------------------------------------------
 # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration

-extensions = []
+# Add any Sphinx extension module names here, as strings. They can be
+# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
+# ones.
+extensions = [
+    "sphinx.ext.autodoc",  # Core Sphinx library for auto html doc generation from docstrings
+    "sphinx.ext.autosummary",  # Create summary tables for modules/classes/methods etc
+    "sphinx.ext.intersphinx",  # Link to other project's documentation (see mapping below)
+    "sphinx.ext.viewcode",  # Add a link to the Python source code for classes, functions etc.
+    "sphinx.ext.todo",
+    "sphinx_copybutton",  # Adds a copy button to code blocks
+    "sphinx_code_tabs",  # Enables tabbed code blocks
+]
+

 templates_path = ["_templates"]
 exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"]
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -1,7 +1,6 @@
-.. PrimAITE documentation master file, created by
-   sphinx-quickstart on Thu Dec  8 09:51:18 2022.
-   You can adapt this file completely to your liking, but it should at least
-   contain the root `toctree` directive.
+.. only:: comment
+
+    © Crown-owned copyright 2023, Defence Science and Technology Laboratory UK

 Welcome to PrimAITE's documentation
 ====================================
@@ -9,15 +8,15 @@ Welcome to PrimAITE's documentation
 What is PrimAITE?
 ------------------------

-PrimAITE (Primary-level AI Training Environment) is a simulation environment for training AI under the ARCD programme. It incorporates the functionality required of a Primary-level environment, as specified in the Dstl ARCD Training Environment Matrix document:
+PrimAITE (Primary-level AI Training Environment) is a simulation environment for training AI under the ARCD programme. It incorporates the functionality required of a Primary-level environment, as specified in the Dstl ARCD Training Environment Matrix document:

 * The ability to model a relevant platform / system context;
 * The ability to model key characteristics of a platform / system by representing connections, IP addresses, ports, traffic loading, operating systems, file system, services and processes;
 * Operates at machine-speed to enable fast training cycles.

-PrimAITE aims to evolve into an ARCD environment that could be used as the follow-on from Reception level approaches (e.g. YAWNING TITAN), and help bridge the Sim-to-Real gap into Secondary level environments (e.g. IMAGINARY YAK).
+PrimAITE aims to evolve into an ARCD environment that could be used as the follow-on from Reception level approaches (e.g. YAWNING TITAN), and help bridge the Sim-to-Real gap into Secondary level environments (e.g. IMAGINARY YAK).

-This is similar to the approach taken by FVEY international partners (e.g. AUS CyBORG, US NSA FARLAND and CAN CyGil). These environments are referenced by the Dstl ARCD Agent Training Environments Knowledge Transfer document (TR141342).
+This is similar to the approach taken by FVEY international partners (e.g. AUS CyBORG, US NSA FARLAND and CAN CyGil). These environments are referenced by the Dstl ARCD Agent Training Environments Knowledge Transfer document (TR141342).

 What is PrimAITE built with
 --------------------------------------
@@ -25,18 +24,42 @@ What is PrimAITE built with
 * `OpenAI's Gym <https://gym.openai.com/>`_ is used as the basis for AI blue agent interaction with the PrimAITE environment
 * `Networkx <https://github.com/networkx/networkx>`_ is used as the underlying data structure used for the PrimAITE environment
 * `Stable Baselines 3 <https://github.com/DLR-RM/stable-baselines3>`_ is used as a default source of RL algorithms (although PrimAITE is not limited to SB3 agents)
+* `Ray RLlib <https://github.com/ray-project/ray>`_ is used as an additional source of RL algorithms
+* `Typer <https://github.com/tiangolo/typer>`_ is used for building CLIs (Command Line Interface applications)
+* `Jupyterlab <https://github.com/jupyterlab/jupyterlab>`_ is used as an extensible environment for interactive and reproducible computing, based on the Jupyter Notebook Architecture
+* `Platformdirs <https://github.com/platformdirs/platformdirs>`_ is used for finding the right location to store user data and configuration but varies per platform
+* `Plotly <https://github.com/plotly/plotly.py>`_ is used for building high level charts
+

 Where next?
 ------------

-The best place to start is :ref:`about`
+Head over to the :ref:`getting-started` page to install and setup PrimAITE!

 .. toctree::
   :maxdepth: 8
   :caption: Contents:
+   :hidden:

+   source/getting_started
   source/about
-   source/dependencies
   source/config
-   source/session
-   source/results
+   source/primaite_session
+   source/custom_agent
+   PrimAITE API <source/_autosummary/primaite>
+   PrimAITE Tests <source/_autosummary/tests>
+   source/dependencies
+   source/glossary
+   source/migration_1.2_-_2.0
+
+
+.. TODO: Add project links once public repo has been created
+
+.. toctree::
+   :caption: Project Links:
+   :hidden:
+
+   Code <https://github.com/Autonomous-Resilient-Cyber-Defence/PrimAITE>
+   Issues <https://github.com/Autonomous-Resilient-Cyber-Defence/PrimAITE/issues>
+   Pull Requests <https://github.com/Autonomous-Resilient-Cyber-Defence/PrimAITE/pulls>
+   Discussions <https://github.com/Autonomous-Resilient-Cyber-Defence/PrimAITE/discussions>
--- a/docs/make.bat
+++ b/docs/make.bat
@@ -1,5 +1,7 @@
@ECHO OFF

+setlocal EnableDelayedExpansion
+
 pushd %~dp0

 REM Command file for Sphinx documentation
@@ -10,6 +12,8 @@ if "%SPHINXBUILD%" == "" (
 set SOURCEDIR=.
 set BUILDDIR=_build

+set AUTOSUMMARYDIR="%cd%\source\_autosummary\"
+
 %SPHINXBUILD% >NUL 2>NUL
 if errorlevel 9009 (
 	echo.
@@ -25,11 +29,30 @@ if errorlevel 9009 (

 if "%1" == "" goto help

+REM delete autosummary if it exists
+
+IF EXIST %AUTOSUMMARYDIR% (
+    echo deleting %AUTOSUMMARYDIR%
+    RMDIR %AUTOSUMMARYDIR% /s /q
+)
+
+REM print the YT licenses
+set LICENSEBUILD=pip-licenses --format=rst --with-urls
+set DEPS="%cd%\source\primaite-dependencies.rst"
+
+%LICENSEBUILD% --output-file=%DEPS%
+
 %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
 goto end

 :help
 %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%

+:clean
+IF EXIST %AUTOSUMMARYDIR% (
+    echo deleting %AUTOSUMMARYDIR%
+    RMDIR %AUTOSUMMARYDIR% /s /q
+)
+
 :end
 popd
--- a/docs/source/about.rst
+++ b/docs/source/about.rst
@@ -1,4 +1,8 @@
-.. _about:
+.. only:: comment
+
+    © Crown-owned copyright 2023, Defence Science and Technology Laboratory UK
+
+.. _about:

 About PrimAITE
 ==============
@@ -8,51 +12,51 @@ Features

 PrimAITE provides the following features:

-* A flexible network / system laydown based on the Python networkx framework
-* Nodes and links (edges) host Python classes in order to present attributes and methods (and hence, a more representative model of a platform / system)
-* A ‘green agent’ Information Exchange Requirement (IER) function allows the representation of traffic (protocols and loading) on any / all links. Application of IERs is based on the status of node operating systems and services
-* A ‘green agent’ node Pattern-of-Life (PoL) function allows the representation of core behaviours on nodes (e.g. Operating state, Operating System state, Service state, File System state)
-* An Access Control List (ACL) function, mimicking the behaviour of a network firewall, is applied across the model, following standard ACL rule format (e.g. DENY/ALLOW, source IP, destination IP, protocol and port). Application of IERs adheres to any ACL restrictions
-* Presents an OpenAI Gym interface to the environment, allowing integration with any OpenAI Gym compliant defensive agents 
-* Red agent activity based on ‘red’ IERs and ‘red’ PoL
-* Defined reward function for use with RL agents (based on nodes status, and green / red IER success)
-* Fully configurable (network / system laydown, IERs, node PoL, ACL, episode step period, episode max steps) and repeatable to suit the training requirements of agents. Therefore, not bound to a representation of any particular platform, system or technology
-* Full capture of discrete metrics relating to agent training (full system state, agent actions taken, average reward)
-* Networkx provides laydown visualisation capability 
+* A flexible network / system laydown based on the Python networkx framework
+* Nodes and links (edges) host Python classes in order to present attributes and methods (and hence, a more representative model of a platform / system)
+* A 'green agent' Information Exchange Requirement (IER) function allows the representation of traffic (protocols and loading) on any / all links. Application of IERs is based on the status of node operating systems and services
+* A 'green agent' node Pattern-of-Life (PoL) function allows the representation of core behaviours on nodes (e.g. changing the Hardware state, Software State, Service state, or File System state)
+* An Access Control List (ACL) function, mimicking the behaviour of a network firewall, is applied across the model, following standard ACL rule format (e.g. DENY/ALLOW, source IP, destination IP, protocol and port). Application of IERs adheres to any ACL restrictions
+* Presents an OpenAI Gym interface to the environment, allowing integration with any OpenAI Gym compliant defensive agents
+* Red agent activity based on 'red' IERs and 'red' PoL
+* Defined reward function for use with RL agents (based on nodes status, and green / red IER success)
+* Fully configurable (network / system laydown, IERs, node PoL, ACL, episode step period, episode max steps) and repeatable to suit the training requirements of agents. Therefore, not bound to a representation of any particular platform, system or technology
+* Full capture of discrete metrics relating to agent training (full system state, agent actions taken, average reward)
+* Networkx provides laydown visualisation capability

 Architecture - Nodes and Links
 ******************************

 **Nodes**

-An inheritance model has been adopted in order to model nodes. All nodes have the following base attributes (Class: Node):
+An inheritance model has been adopted in order to model nodes. All nodes have the following base attributes (Class: Node):

-* ID
+* ID
 * Name
-* Type (e.g. computer, switch, RTU - enumeration)
-* Priority (P1, P2, P3, P4 or P5 - enumeration)
-* Operating State (ON, OFF, RESETTING - enumeration)
+* Type (e.g. computer, switch, RTU - enumeration)
+* Priority (P1, P2, P3, P4 or P5 - enumeration)
+* Hardware State (ON, OFF, RESETTING, SHUTTING_DOWN, BOOTING - enumeration)

-Active Nodes also have the following attributes (Class: Active Node):
+Active Nodes also have the following attributes (Class: Active Node):

-* IP Address
-* Operating System State (GOOD, PATCHING, COMPROMISED - enumeration)
+* IP Address
+* Software State (GOOD, PATCHING, COMPROMISED - enumeration)
 * File System State (GOOD, CORRUPT, DESTROYED, REPAIRING, RESTORING - enumeration)

-Service Nodes also have the following attributes (Class: Service Node):
+Service Nodes also have the following attributes (Class: Service Node):

-* List of Services (where service is composed of service name and port). There is no theoretical limit on the number of services that can be modelled. Services and protocols are currently intrinsically linked (i.e. a service is an application on a node transmitting traffic of this protocol type)
+* List of Services (where service is composed of service name and port). There is no theoretical limit on the number of services that can be modelled. Services and protocols are currently intrinsically linked (i.e. a service is an application on a node transmitting traffic of this protocol type)
 * Service state (GOOD, PATCHING, COMPROMISED, OVERWHELMED - enumeration)

 Passive Nodes are currently not used (but may be employed for non IP-based components such as machinery actuators in future releases).

 **Links**

-Links are modelled both as network edges (networkx) and as Python classes, in order to extend their functionality. Links include the following attributes:
+Links are modelled both as network edges (networkx) and as Python classes, in order to extend their functionality. Links include the following attributes:

-* ID
+* ID
 * Name
-* Bandwidth (bits/s)
+* Bandwidth (bits/s)
 * Source node ID
 * Destination node ID
 * Protocol list (containing the loading of protocols currently running on the link)
@@ -62,32 +66,32 @@ When the simulation runs, IERs are applied to the links in order to model traffi
 Information Exchange Requirements (IERs)
 ****************************************

-PrimAITE adopts the concept of Information Exchange Requirements (IERs) to model both green agent (background) and red agent (adversary) behaviour. IERs are used to initiate modelling of traffic loading on the network, and have the following attributes:
+PrimAITE adopts the concept of Information Exchange Requirements (IERs) to model both green agent (background) and red agent (adversary) behaviour. IERs are used to initiate modelling of traffic loading on the network, and have the following attributes:

-* ID
-* Start step (i.e. which step in the training episode should the IER start)
-* End step (i.e. which step in the training episode should the IER end)
+* ID
+* Start step (i.e. which step in the training episode should the IER start)
+* End step (i.e. which step in the training episode should the IER end)
 * Source node ID
-* Destination node ID
-* Load (bits/s)
-* Protocol
-* Port
+* Destination node ID
+* Load (bits/s)
+* Protocol
+* Port
 * Running status (i.e. on / off)

-The application of green agent IERs between a source and destination follows a number of rules. Specifically:
+The application of green agent IERs between a source and destination follows a number of rules. Specifically:

-1. Does the current simulation time step fall between IER start and end step
-2. Is the source node operational (both physically and at an O/S level), and is the service (protocol / port) associated with the IER (a) present on this node, and (b) in an operational state (i.e. not PATCHING)
-3. Is the destination node operational (both physically and at an O/S level), and is the service (protocol / port) associated with the IER (a) present on this node, and (b) in an operational state (i.e. not PATCHING)
-4. Are there any Access Control List rules in place that prevent the application of this IER
-5. Are all switches in the (OSPF) path between source and destination operational (both physically and at an O/S level)
+1. Does the current simulation time step fall between IER start and end step
+2. Is the source node operational (both physically and at an O/S level), and is the service (protocol / port) associated with the IER (a) present on this node, and (b) in an operational state (i.e. not PATCHING)
+3. Is the destination node operational (both physically and at an O/S level), and is the service (protocol / port) associated with the IER (a) present on this node, and (b) in an operational state (i.e. not PATCHING)
+4. Are there any Access Control List rules in place that prevent the application of this IER
+5. Are all switches in the (OSPF) path between source and destination operational (both physically and at an O/S level)

-For red agent IERs, the application of IERs between a source and destination follows a number of subtly different rules. Specifically:
+For red agent IERs, the application of IERs between a source and destination follows a number of subtly different rules. Specifically:

-1. Does the current simulation time step fall between IER start and end step
-2. Is the source node operational, and is the service (protocol / port) associated with the IER (a) present on that node and (b) already in a compromised state
-3. Is the destination node operational, and is the service (protocol / port) associated with the IER present on that node
-4. Are there any Access Control List rules in place that prevent the application of this IER
+1. Does the current simulation time step fall between IER start and end step
+2. Is the source node operational, and is the service (protocol / port) associated with the IER (a) present on that node and (b) already in a compromised state
+3. Is the destination node operational, and is the service (protocol / port) associated with the IER present on that node
+4. Are there any Access Control List rules in place that prevent the application of this IER
 5. Are all switches in the (OSPF) path between source and destination operational (both physically and at an O/S level)

 Assuming the rules pass, the IER is applied to all relevant links (based on use of OSPF) between source and destination.
@@ -101,15 +105,17 @@ The status changes that can be made to a node are as follows:

 * All Nodes:

-   * Operating State:
+   * Hardware State:

      * ON
      * OFF
      * RESETTING - when a status of resetting is entered, the node will automatically exit this state after a number of steps (as defined by the nodeResetDuration configuration item) after which it returns to an ON state
+      * BOOTING
+      * SHUTTING_DOWN

 * Active Nodes and Service Nodes:

-   * Operating System State:
+   * Software State:

      * GOOD
      * PATCHING - when a status of patching is entered, the node will automatically exit this state after a number of steps (as defined by the osPatchingDuration configuration item) after which it returns to a GOOD state
@@ -149,7 +155,7 @@ Red agent pattern-of-life has an additional feature not found in the green patte
 Access Control List modelling
 *****************************

-An Access Control List (ACL) is modelled to provide the means to manage traffic flows in the system. This will allow defensive agents the means to turn on / off rules, or potentially create new rules, to counter an attack.
+An Access Control List (ACL) is modelled to provide the means to manage traffic flows in the system. This will allow defensive agents the means to turn on / off rules, or potentially create new rules, to counter an attack.

 The ACL follows a standard network firewall format. For example:

@@ -182,11 +188,13 @@ All ACL rules are considered when applying an IER. Logic follows the order of ru

 Observation Spaces
 ******************
+The observation space provides the blue agent with information about the current status of nodes and links.

-The OpenAI Gym observation space provides the status of all nodes and links across the whole system:
+PrimAITE builds on top of Gym Spaces to create an observation space that is easily configurable for users. It's made up of components which are managed by the :py:class:`primaite.environment.observations.ObservationsHandler`. Each training scenario can define its own observation space, and the user can choose which information to inlude, and how it should be formatted.

-* Nodes (in terms of operating state, operating system state, file system state and services state) 
-* Links (in terms of current loading for each service/protocol)
+NodeLinkTable component
+-----------------------
+For example, the :py:class:`primaite.environment.observations.NodeLinkTable` component represents the status of nodes and links as a ``gym.spaces.Box`` with an example format shown below:

 An example observation space is provided below:

@@ -196,8 +204,8 @@ An example observation space is provided below:

   * -
     - ID
-     - Operating State
-     - O/S State
+     - Hardware State
+     - Software State
     - File System State
     - Service / Protocol A
     - Service / Protocol B
@@ -244,46 +252,97 @@ An example observation space is provided below:
     - 5000
     - 0

-The observation space is a 6 x 6 Box type (OpenAI Gym Space) in this example. This is made up from the node and link information detailed below.
-
 For the nodes, the following values are represented:

- * ID
- * Operating State:
+.. code-block::

-    * 1 = ON
-    * 2 = OFF
-    * 3 = RESETTING
-
- * O/S State:
-
-    * 1 = GOOD
-    * 2 = PATCHING
-    * 3 = COMPROMISED
-
- * Service State:
-
-    * 1 = GOOD
-    * 2 = PATCHING
-    * 3 = COMPROMISED
-    * 4 = OVERWHELMED
-
- * File System State:
-
-    * 1 = GOOD
-    * 2 = CORRUPT
-    * 3 = DESTROYED
-    * 4 = REPAIRING
-    * 5 = RESTORING
+  [
+    ID
+    Hardware State            (1=ON,   2=OFF,  3=RESETTING,  4=SHUTTING_DOWN, 5=BOOTING)
+    Operating System State    (0=none, 1=GOOD, 2=PATCHING,   3=COMPROMISED)
+    File System State         (0=none, 1=GOOD, 2=CORRUPT,    3=DESTROYED,  4=REPAIRING, 5=RESTORING)
+    Service1/Protocol1 state  (0=none, 1=GOOD, 2=PATCHING,   3=COMPROMISED)
+    Service2/Protocol2 state  (0=none, 1=GOOD, 2=PATCHING,   3=COMPROMISED)
+  ]

 (Note that each service available in the network is provided as a column, although not all nodes may utilise all services)

 For the links, the following statuses are represented:

- * ID
- * Operating State = N/A
- * O/S State = N/A
- * Protocol = loading in bits/s
+.. code-block::
+
+  [
+    ID
+    Hardware State            (0=not applicable)
+    Operating System State    (0=not applicable)
+    File System State         (0=not applicable)
+    Service1/Protocol1 state  (Traffic load from this protocol on this link)
+    Service2/Protocol2 state  (Traffic load from this protocol on this link)
+  ]
+
+NodeStatus component
+----------------------
+This is a MultiDiscrete observation space that can be though of as a one-dimensional vector of discrete states.
+The example above would have the following structure:
+
+.. code-block::
+
+  [
+    node1_info
+    node2_info
+    node3_info
+  ]
+
+Each ``node_info`` contains the following:
+
+.. code-block::
+
+  [
+    hardware_state    (0=none, 1=ON,   2=OFF,      3=RESETTING, 4=SHUTTING_DOWN, 5=BOOTING)
+    software_state    (0=none, 1=GOOD, 2=PATCHING, 3=COMPROMISED)
+    file_system_state (0=none, 1=GOOD, 2=CORRUPT,  3=DESTROYED, 4=REPAIRING, 5=RESTORING)
+    service1_state    (0=none, 1=GOOD, 2=PATCHING, 3=COMPROMISED)
+    service2_state    (0=none, 1=GOOD, 2=PATCHING, 3=COMPROMISED)
+  ]
+
+In a network with three nodes and two services, the full observation space would have 15 elements. It can be written with ``gym`` notation to indicate the number of discrete options for each of the elements of the observation space. For example:
+
+.. code-block::
+
+  gym.spaces.MultiDiscrete([4,5,6,4,4,4,5,6,4,4,4,5,6,4,4])
+
+.. note::
+  NodeStatus observation component provides information only about nodes. Links are not considered.
+
+LinkTrafficLevels
+-----------------
+This component is a MultiDiscrete space showing the traffic flow levels on the links in the network, after applying a threshold to convert it from a continuous to a discrete value.
+There are two configurable parameters:
+* ``quantisation_levels`` determines how many discrete bins to use for converting the continuous traffic value to discrete (default is 5).
+* ``combine_service_traffic`` determines whether to separately output traffic use for each network protocol or whether to combine them into an overall value for the link. (default is ``True``)
+
+For example, with default parameters and a network with three links, the structure of this component would be:
+
+.. code-block::
+
+  [
+    link1_status
+    link2_status
+    link3_status
+  ]
+
+Each ``link_status`` is a number from 0-4 representing the network load in relation to bandwidth.
+
+.. code-block::
+
+  0 = No traffic (0%)
+  1 = low traffic (1%-33%)
+  2 = medium traffic (33%-66%)
+  3 = high traffic (66%-99%)
+  4 = max traffic/ overwhelmed (100%)
+
+Using ``gym`` notation, the shape of the obs space is: ``gym.spaces.MultiDiscrete([5,5,5])``.
+

 Action Spaces
 **************
@@ -292,29 +351,40 @@ The action space available to the blue agent comes in two types:

 1. Node-based
 2. Access Control List
+ 3. Any (Agent can take both node-based and ACL-based actions)

 The choice of action space used during a training session is determined in the config_[name].yaml file.

 **Node-Based**

-The agent is able to influence the status of nodes by switching them off, resetting, or patching operating systems and services. In this instance, the action space is an OpenAI Gym multidiscrete type, as follows:
+The agent is able to influence the status of nodes by switching them off, resetting, or patching operating systems and services. In this instance, the action space is an OpenAI Gym spaces.Discrete type, as follows:

- * [0, num nodes] - Node ID (0 = nothing, node ID)
- * [0, 4] - What property it's acting on (0 = nothing, 1 = state, 2 = O/S state, 3 = service state, 4 = file system state)
- * [0, 3] - Action on property (0 = nothing, 1 = on / scan, 2 = off / repair, 3 = reset / patch / restore)
- * [0, num services] - Resolves to service ID (0 = nothing, resolves to service)
+ * Dictionary item {... ,1: [x1, x2, x3,x4] ...}
+   The placeholders inside the list under the key '1' mean the following:
+
+    * [0, num nodes] - Node ID (0 = nothing, node ID)
+    * [0, 4] - What property it's acting on (0 = nothing, 1 = state, 2 = SoftwareState, 3 = service state, 4 = file system state)
+    * [0, 3] - Action on property (0 = nothing, 1 = on / scan, 2 = off / repair, 3 = reset / patch / restore)
+    * [0, num services] - Resolves to service ID (0 = nothing, resolves to service)

 **Access Control List**

-The blue agent is able to influence the configuration of the Access Control List rule set (which implements a system-wide firewall). In this instance, the action space is an OpenAI multidiscrete type, as follows:
+The blue agent is able to influence the configuration of the Access Control List rule set (which implements a system-wide firewall). In this instance, the action space is an OpenAI spaces.Discrete type, as follows:

+   * Dictionary item {... ,1: [x1, x2, x3, x4, x5, x6] ...}
+   The placeholders inside the list under the key '1' mean the following:

- * [0, 2] - Action (0 = do nothing, 1 = create rule, 2 = delete rule)
- * [0, 1] - Permission (0 = DENY, 1 = ALLOW)
- * [0, num nodes] - Source IP (0 = any, then 1 -> x resolving to IP addresses)
- * [0, num nodes] - Dest IP (0 = any, then 1 -> x resolving to IP addresses)
- * [0, num services] - Protocol (0 = any, then 1 -> x resolving to protocol)
- * [0, num ports] - Port (0 = any, then 1 -> x resolving to port)
+     * [0, 2] - Action (0 = do nothing, 1 = create rule, 2 = delete rule)
+     * [0, 1] - Permission (0 = DENY, 1 = ALLOW)
+     * [0, num nodes] - Source IP (0 = any, then 1 -> x resolving to IP addresses)
+     * [0, num nodes] - Dest IP (0 = any, then 1 -> x resolving to IP addresses)
+     * [0, num services] - Protocol (0 = any, then 1 -> x resolving to protocol)
+     * [0, num ports] - Port (0 = any, then 1 -> x resolving to port)
+
+**ANY**
+The agent is able to carry out both **Node-Based** and **Access Control List** operations.
+
+This means the dictionary will contain key-value pairs in the format of BOTH Node-Based and Access Control List as seen above.

 Rewards
 *******
@@ -342,4 +412,3 @@ The PrimAITE project has an ambition to include the following enhancements in fu

 * Integration with a suitable standardised framework to allow multi-agent integration
 * Integration with external threat emulation tools, either using off-line data, or integrating at runtime
-* Provision of data such that agents can construct alternative observation spaces (as an alternative to the default PrimAITE observation space)
--- a/docs/source/config.rst
+++ b/docs/source/config.rst
@@ -1,3 +1,7 @@
+.. only:: comment
+
+    © Crown-owned copyright 2023, Defence Science and Technology Laboratory UK
+
 .. _config:

 The Config Files Explained
@@ -5,323 +9,410 @@ The Config Files Explained

 PrimAITE uses two configuration files for its operation:

-* config_main.yaml - used to define the top-level settings of the PrimAITE environment, and the session that is to be run.
-* config_[name].yaml - used to define the low-level settings of a session, including the network laydown, green / red agent information exchange requirements (IERSs), Access Control Rules, Action Space type, and the number of steps in each episode.
+* **The Training Config**

-config_main.yaml:
-*****************
+    Used to define the top-level settings of the PrimAITE environment, the reward values, and the session that is to be run.

-The config_main.yaml file consists of the following attributes:
+* **The Lay Down Config**
+
+    Used to define the low-level settings of a session, including the network laydown, green / red agent information exchange requirements (IERSs) and Access Control Rules.
+
+Training Config:
+*******************
+
+The Training Config file consists of the following attributes:

 **Generic Config Values**

-* **agentIdentifier** [enum]

-   This identifies the agent to use for the session. Select from one of the following:
+* **agent_framework** [enum]

-   * GENERIC - Where a user developed agent is to be used
-   * STABLE_BASELINES3_PPO - Use a SB3 PPO agent
-   * STABLE_BASELINES3_A2C - use a SB3 A2C agent
+    This identifies the agent framework to be used to instantiate the agent algorithm. Select from one of the following:

-* **numEpisodes** [int]
+    * NONE - Where a user developed agent is to be used
+    * SB3 - Stable Baselines3
+    * RLLIB - Ray RLlib.

-   This defines the number of episodes that the agent will train or be evaluated over. Each episode consists of a number of steps (with step number defined in the config_[name].yaml file)
+* **agent_identifier**

-* **timeDelay** [int]
+    This identifies the agent to use for the session. Select from one of the following:

-   The time delay (in milliseconds) to take between each step when running a GENERIC agent session
+    * A2C - Advantage Actor Critic
+    * PPO - Proximal Policy Optimization
+    * HARDCODED - A custom built deterministic agent
+    * RANDOM - A Stochastic random agent

-* **configFilename** [filename]

-   The name of the config_[name].yaml file to use for this session
+* **random_red_agent** [bool]

-* **sessionType** [text]
+    Determines if the session should be run with a random red agent

-   Type of session to be run (TRAINING or EVALUATION)
+* **action_type** [enum]

-* **loadAgent** [bool]
+    Determines whether a NODE, ACL, or ANY (combined NODE & ACL) action space format is adopted for the session

-   Determine whether to load an agent from file

-* **agentLoadFile** [text]
+* **OBSERVATION_SPACE** [dict]

-   File path and file name of agent if you're loading one in
+    Allows for user to configure observation space by combining one or more observation components. List of available
+    components is in :py:mod:`primaite.environment.observations`.

-* **observationSpaceHighValue** [int]
+    The observation space config item should have a ``components`` key which is a list of components. Each component
+    config must have a ``name`` key, and can optionally have an ``options`` key. The ``options`` are passed to the
+    component while it is being initialised.

-   The high value to use for values in the observation space. This is set to 1000000000 by default, and should not need changing in most cases
+    This example illustrates the correct format for the observation space config item
+
+    .. code-block:: yaml
+
+        observation_space:
+        components:
+          - name: NODE_LINK_TABLE
+          - name: NODE_STATUSES
+          - name: LINK_TRAFFIC_LEVELS
+          - name: ACCESS_CONTROL_LIST
+            options:
+              combine_service_traffic : False
+              quantisation_levels: 99
+
+
+    Currently available components are:
+
+      * :py:mod:`NODE_LINK_TABLE<primaite.environment.observations.NodeLinkTable>` this does not accept any additional options
+      * :py:mod:`NODE_STATUSES<primaite.environment.observations.NodeStatuses>`, this does not accept any additional options
+      * :py:mod:`ACCESS_CONTROL_LIST<primaite.environment.observations.AccessControlList>`, this does not accept additional options
+      * :py:mod:`LINK_TRAFFIC_LEVELS<primaite.environment.observations.LinkTrafficLevels>`, this accepts the following options:
+
+        * ``combine_service_traffic`` - whether to consider bandwidth use separately for each network protocol or combine them into a single bandwidth reading (boolean)
+        * ``quantisation_levels`` - how many discrete bandwidth usage levels to use for encoding. This can be an integer equal to or greater than 3.
+
+    The other configurable item is ``flatten`` which is false by default. When set to true, the observation space is flattened (turned into a 1-D vector). You should use this if your RL agent does not natively support observation space types like ``gym.Spaces.Tuple``.
+
+* **num_train_episodes** [int]
+
+    This defines the number of episodes that the agent will train for.
+
+
+* **num_train_steps** [int]
+
+    Determines the number of steps to run in each episode of the training session.
+
+
+* **num_eval_episodes** [int]
+
+    This defines the number of episodes that the agent will be evaluated over.
+
+
+* **num_eval_steps** [int]
+
+    Determines the number of steps to run in each episode of the evaluation session.
+
+
+* **time_delay** [int]
+
+    The time delay (in milliseconds) to take between each step when running a GENERIC agent session
+
+
+* **session_type** [text]
+
+    Type of session to be run (TRAINING, EVALUATION, or BOTH)
+
+* **load_agent** [bool]
+
+    Determine whether to load an agent from file
+
+* **agent_load_file** [text]
+
+    File path and file name of agent if you're loading one in
+
+* **observation_space_high_value** [int]
+
+    The high value to use for values in the observation space. This is set to 1000000000 by default, and should not need changing in most cases
+
+* **implicit_acl_rule** [str]
+
+    Determines which Explicit rule the ACL list has - two options are: DENY or ALLOW.
+
+* **max_number_acl_rules** [int]
+
+    Sets a limit on how many ACL rules there can be in the ACL list throughout the training session.

 **Reward-Based Config Values**

-* **Generic [allOk]** [int]
+Rewards are calculated based on the difference between the current state and reference state (the 'should be' state) of the environment.

-   The score to give when the current situation (for a given component) is no different from that expected in the baseline (i.e. as though no blue or red agent actions had been undertaken)
+* **Generic [all_ok]** [float]

-* **Node Operating State [offShouldBeOn]** [int]
+    The score to give when the current situation (for a given component) is no different from that expected in the baseline (i.e. as though no blue or red agent actions had been undertaken)

-   The score to give when the node should be on, but is off
+* **Node Hardware State [off_should_be_on]** [float]

-* **Node Operating State [offShouldBeResetting]** [int]
+    The score to give when the node should be on, but is off

-   The score to give when the node should be resetting, but is off
+* **Node Hardware State [off_should_be_resetting]** [float]

-* **Node Operating State [onShouldBeOff]** [int]
+    The score to give when the node should be resetting, but is off

-   The score to give when the node should be off, but is on
+* **Node Hardware State [on_should_be_off]** [float]

-* **Node Operating State [onShouldBeResetting]** [int]
+    The score to give when the node should be off, but is on

-   The score to give when the node should be resetting, but is on
+* **Node Hardware State [on_should_be_resetting]** [float]

-* **Node Operating State [resettingShouldBeOn]** [int]
+    The score to give when the node should be resetting, but is on

-   The score to give when the node should be on, but is resetting
+* **Node Hardware State [resetting_should_be_on]** [float]

-* **Node Operating State [resettingShouldBeOff]** [int]
+    The score to give when the node should be on, but is resetting

-   The score to give when the node should be off, but is resetting
+* **Node Hardware State [resetting_should_be_off]** [float]

-* **Node Operating State [resetting]** [int]
+    The score to give when the node should be off, but is resetting

-   The score to give when the node is resetting
+* **Node Hardware State [resetting]** [float]

-* **Node Operating System or Service State [goodShouldBePatching]** [int]
+    The score to give when the node is resetting

-   The score to give when the state should be patching, but is good
+* **Node Operating System or Service State [good_should_be_patching]** [float]

-* **Node Operating System or Service State [goodShouldBeCompromised]** [int]
+    The score to give when the state should be patching, but is good

-   The score to give when the state should be compromised, but is good
+* **Node Operating System or Service State [good_should_be_compromised]** [float]

-* **Node Operating System or Service State [goodShouldBeOverwhelmed]** [int]
+    The score to give when the state should be compromised, but is good

-   The score to give when the state should be overwhelmed, but is good
+* **Node Operating System or Service State [good_should_be_overwhelmed]** [float]

-* **Node Operating System or Service State [patchingShouldBeGood]** [int]
+    The score to give when the state should be overwhelmed, but is good

-   The score to give when the state should be good, but is patching
+* **Node Operating System or Service State [patching_should_be_good]** [float]

-* **Node Operating System or Service State [patchingShouldBeCompromised]** [int]
+    The score to give when the state should be good, but is patching

-   The score to give when the state should be compromised, but is patching
+* **Node Operating System or Service State [patching_should_be_compromised]** [float]

-* **Node Operating System or Service State [patchingShouldBeOverwhelmed]** [int]
+    The score to give when the state should be compromised, but is patching

-   The score to give when the state should be overwhelmed, but is patching
+* **Node Operating System or Service State [patching_should_be_overwhelmed]** [float]

-* **Node Operating System or Service State [patching]** [int]
+    The score to give when the state should be overwhelmed, but is patching

-   The score to give when the state is patching
+* **Node Operating System or Service State [patching]** [float]

-* **Node Operating System or Service State [compromisedShouldBeGood]** [int]
+    The score to give when the state is patching

-   The score to give when the state should be good, but is compromised
+* **Node Operating System or Service State [compromised_should_be_good]** [float]

-* **Node Operating System or Service State [compromisedShouldBePatching]** [int]
+    The score to give when the state should be good, but is compromised

-   The score to give when the state should be patching, but is compromised
+* **Node Operating System or Service State [compromised_should_be_patching]** [float]

-* **Node Operating System or Service State [compromisedShouldBeOverwhelmed]** [int]
+    The score to give when the state should be patching, but is compromised

-   The score to give when the state should be overwhelmed, but is compromised
+* **Node Operating System or Service State [compromised_should_be_overwhelmed]** [float]

-* **Node Operating System or Service State [compromised]** [int]
+    The score to give when the state should be overwhelmed, but is compromised

-   The score to give when the state is compromised
+* **Node Operating System or Service State [compromised]** [float]

-* **Node Operating System or Service State [overwhelmedShouldBeGood]** [int]
+    The score to give when the state is compromised

-   The score to give when the state should be good, but is overwhelmed
+* **Node Operating System or Service State [overwhelmed_should_be_good]** [float]

-* **Node Operating System or Service State [overwhelmedShouldBePatching]** [int]
+    The score to give when the state should be good, but is overwhelmed

-   The score to give when the state should be patching, but is overwhelmed
+* **Node Operating System or Service State [overwhelmed_should_be_patching]** [float]

-* **Node Operating System or Service State [overwhelmedShouldBeCompromised]** [int]
+    The score to give when the state should be patching, but is overwhelmed

-   The score to give when the state should be compromised, but is overwhelmed
+* **Node Operating System or Service State [overwhelmed_should_be_compromised]** [float]

-* **Node Operating System or Service State [overwhelmed]** [int]
+    The score to give when the state should be compromised, but is overwhelmed

-   The score to give when the state is overwhelmed
+* **Node Operating System or Service State [overwhelmed]** [float]

-* **Node File System State [goodShouldBeRepairing]** [int]
+    The score to give when the state is overwhelmed
+
+* **Node File System State [good_should_be_repairing]** [float]

    The score to give when the state should be repairing, but is good

-* **Node File System State [goodShouldBeRestoring]** [int]
+* **Node File System State [good_should_be_restoring]** [float]

    The score to give when the state should be restoring, but is good

-* **Node File System State [goodShouldBeCorrupt]** [int]
+* **Node File System State [good_should_be_corrupt]** [float]

    The score to give when the state should be corrupt, but is good

-* **Node File System State [goodShouldBeDestroyed]** [int]
+* **Node File System State [good_should_be_destroyed]** [float]

    The score to give when the state should be destroyed, but is good

-* **Node File System State [repairingShouldBeGood]** [int]
+* **Node File System State [repairing_should_be_good]** [float]

    The score to give when the state should be good, but is repairing

-* **Node File System State [repairingShouldBeRestoring]** [int]
+* **Node File System State [repairing_should_be_restoring]** [float]

    The score to give when the state should be restoring, but is repairing

-* **Node File System State [repairingShouldBeCorrupt]** [int]
+* **Node File System State [repairing_should_be_corrupt]** [float]

    The score to give when the state should be corrupt, but is repairing

-* **Node File System State [repairingShouldBeDestroyed]** [int]
+* **Node File System State [repairing_should_be_destroyed]** [float]

    The score to give when the state should be destroyed, but is repairing

-* **Node File System State [repairing]** [int]
+* **Node File System State [repairing]** [float]

    The score to give when the state is repairing

-* **Node File System State [restoringShouldBeGood]** [int]
+* **Node File System State [restoring_should_be_good]** [float]

    The score to give when the state should be good, but is restoring

-* **Node File System State [restoringShouldBeRepairing]** [int]
+* **Node File System State [restoring_should_be_repairing]** [float]

    The score to give when the state should be repairing, but is restoring

-* **Node File System State [restoringShouldBeCorrupt]** [int]
+* **Node File System State [restoring_should_be_corrupt]** [float]

    The score to give when the state should be corrupt, but is restoring

-* **Node File System State [restoringShouldBeDestroyed]** [int]
+* **Node File System State [restoring_should_be_destroyed]** [float]

    The score to give when the state should be destroyed, but is restoring

-* **Node File System State [restoring]** [int]
+* **Node File System State [restoring]** [float]

    The score to give when the state is restoring

-* **Node File System State [corruptShouldBeGood]** [int]
+* **Node File System State [corrupt_should_be_good]** [float]

    The score to give when the state should be good, but is corrupt

-* **Node File System State [corruptShouldBeRepairing]** [int]
+* **Node File System State [corrupt_should_be_repairing]** [float]

    The score to give when the state should be repairing, but is corrupt

-* **Node File System State [corruptShouldBeRestoring]** [int]
+* **Node File System State [corrupt_should_be_restoring]** [float]

    The score to give when the state should be restoring, but is corrupt

-* **Node File System State [corruptShouldBeDestroyed]** [int]
+* **Node File System State [corrupt_should_be_destroyed]** [float]

    The score to give when the state should be destroyed, but is corrupt

-* **Node File System State [corrupt]** [int]
+* **Node File System State [corrupt]** [float]

    The score to give when the state is corrupt

-* **Node File System State [destroyedShouldBeGood]** [int]
+* **Node File System State [destroyed_should_be_good]** [float]

    The score to give when the state should be good, but is destroyed

-* **Node File System State [destroyedShouldBeRepairing]** [int]
+* **Node File System State [destroyed_should_be_repairing]** [float]

    The score to give when the state should be repairing, but is destroyed

-* **Node File System State [destroyedShouldBeRestoring]** [int]
+* **Node File System State [destroyed_should_be_restoring]** [float]

    The score to give when the state should be restoring, but is destroyed

-* **Node File System State [destroyedShouldBeCorrupt]** [int]
+* **Node File System State [destroyed_should_be_corrupt]** [float]

    The score to give when the state should be corrupt, but is destroyed

-* **Node File System State [destroyed]** [int]
+* **Node File System State [destroyed]** [float]

    The score to give when the state is destroyed

-* **Node File System State [scanning]** [int]
+* **Node File System State [scanning]** [float]

    The score to give when the state is scanning

-* **IER Status [redIerRunning]** [int]
+* **IER Status [red_ier_running]** [float]

-   The score to give when a red agent IER is permitted to run
+    The score to give when a red agent IER is permitted to run

-* **IER Status [greenIerBlocked]** [int]
+* **IER Status [green_ier_blocked]** [float]

-   The score to give when a green agent IER is prevented from running
+    The score to give when a green agent IER is prevented from running

 **Patching / Reset Durations**

-* **osPatchingDuration** [int]
+* **os_patching_duration** [int]

-   The number of steps to take when patching an Operating System
+    The number of steps to take when patching an Operating System

-* **nodeResetDuration** [int]
+* **node_reset_duration** [int]

-   The number of steps to take when resetting a node's operating state
+    The number of steps to take when resetting a node's hardware state

-* **servicePatchingDuration** [int]
+* **service_patching_duration** [int]

-   The number of steps to take when patching a service
+    The number of steps to take when patching a service

-* **fileSystemRepairingLimit** [int]:
+* **file_system_repairing_limit** [int]:

-   The number of steps to take when repairing the file system
+    The number of steps to take when repairing the file system

-* **fileSystemRestoringLimit** [int]
+* **file_system_restoring_limit** [int]

-   The number of steps to take when restoring the file system
+    The number of steps to take when restoring the file system

-* **fileSystemScanningLimit** [int]
+* **file_system_scanning_limit** [int]

-   The number of steps to take when scanning the file system
+    The number of steps to take when scanning the file system

-config_[name].yaml:
+* **deterministic** [bool]
+
+   Set to true if the agent evaluation should be deterministic. Default is ``False``
+
+* **seed** [int]
+
+   Seed used in the randomisation in agent training. Default is ``None``
+
+The Lay Down Config
 *******************

-The config_[name].yaml file consists of the following attributes:
+The lay down config file consists of the following attributes:

-* **itemType: ACTIONS** [enum]
-
-   Determines whether a NODE or ACL action space format is adopted for the session

 * **itemType: STEPS** [int]

-   Determines the number of steps to run in each episode of the session
+* **item_type: PORTS** [int]

-* **itemType: PORTS** [int]
+    Provides a list of ports modelled in this session

-   Provides a list of ports modelled in this session
+* **item_type: SERVICES** [freetext]

-* **itemType: SERVICES** [freetext]
+    Provides a list of services modelled in this session

-   Provides a list of services modelled in this session
+* **item_type: NODE**

-* **itemType: NODE**
-
-   Defines a node included in the system laydown being simulated. It should consist of the following attributes:
+    Defines a node included in the system laydown being simulated. It should consist of the following attributes:

     * **id** [int]: Unique ID for this YAML item
     * **name** [freetext]: Human-readable name of the component
-     * **baseType** [enum]: Relates to the base type of the node. Can be SERVICE, ACTIVE or PASSIVE. PASSIVE nodes do not have an operating system or services. ACTIVE nodes have an operating system, but no services. SERVICE nodes have both an operating system and one or more services
-     * **nodeType** [enum]: Relates to the component type. Can be one of CCTV, SWITCH, COMPUTER, LINK, MONITOR, PRINTER, LOP, RTU, ACTUATOR or SERVER
+     * **node_class** [enum]: Relates to the base type of the node. Can be SERVICE, ACTIVE or PASSIVE. PASSIVE nodes do not have an operating system or services. ACTIVE nodes have an operating system, but no services. SERVICE nodes have both an operating system and one or more services
+     * **node_type** [enum]: Relates to the component type. Can be one of CCTV, SWITCH, COMPUTER, LINK, MONITOR, PRINTER, LOP, RTU, ACTUATOR or SERVER
     * **priority** [enum]: Provides a priority for each node. Can be one of P1, P2, P3, P4 or P5 (which P1 being the highest)
-     * **hardwareState** [enum]: The initial hardware state of the node. Can be one of ON, OFF or RESETTING
-     * **ipAddress** [IP address]: The IP address of the component in format xxx.xxx.xxx.xxx
-     * **softwareState** [enum]: The intial state of the node operating system. Can be GOOD, PATCHING or COMPROMISED
-     * **fileSystemState** [enum]: The initial state of the node file system. Can be GOOD, CORRUPT, DESTROYED, REPAIRING or RESTORING
+     * **hardware_state** [enum]: The initial hardware state of the node. Can be one of ON, OFF or RESETTING
+     * **ip_address** [IP address]: The IP address of the component in format xxx.xxx.xxx.xxx
+     * **software_state** [enum]: The intial state of the node operating system. Can be GOOD, PATCHING or COMPROMISED
+     * **file_system_state** [enum]: The initial state of the node file system. Can be GOOD, CORRUPT, DESTROYED, REPAIRING or RESTORING
     * **services**: For each service associated with the node:

        * **name** [freetext]: Free-text name of the service, but must match one of the services defined for the system in the services list
        * **port** [int]: Integer value of the port related to this service, but must match one of the ports defined for the system in the ports list
        * **state** [enum]: The initial state of the service. Can be one of GOOD, PATCHING, COMPROMISED or OVERWHELMED

-* **itemType: LINK**
+* **item_type: LINK**

-   Defines a link included in the system laydown being simulated. It should consist of the following attributes:
+    Defines a link included in the system laydown being simulated. It should consist of the following attributes:

     * **id** [int]: Unique ID for this YAML item
     * **name** [freetext]: Human-readable name of the component
@@ -329,63 +420,63 @@ The config_[name].yaml file consists of the following attributes:
     * **source** [int]: The ID of the source node
     * **destination** [int]: The ID of the destination node

-* **itemType: GREEN_IER**
+* **item_type: GREEN_IER**

-   Defines a green agent Information Exchange Requirement (IER). It should consist of:
+    Defines a green agent Information Exchange Requirement (IER). It should consist of:

     * **id** [int]: Unique ID for this YAML item
-     * **startStep** [int]: The start step (in the episode) for this IER to begin
-     * **endStep** [int]: The end step (in the episode) for this IER to finish
+     * **start_step** [int]: The start step (in the episode) for this IER to begin
+     * **end_step** [int]: The end step (in the episode) for this IER to finish
     * **load** [int]: The load (in bits/s) for this IER to apply to links
     * **protocol** [freetext]: The protocol to apply to the links. This must match a value in the services list
     * **port** [int]: The port that the protocol is running on. This must match a value in the ports list
     * **source** [int]: The ID of the source node
     * **destination** [int]: The ID of the destination node
-     * **missionCriticality** [enum]: The mission criticality of this IER (with 5 being highest, 1 lowest)
+     * **mission_criticality** [enum]: The mission criticality of this IER (with 5 being highest, 1 lowest)

-* **itemType: RED_IER**
+* **item_type: RED_IER**

-   Defines a red agent Information Exchange Requirement (IER). It should consist of:
+    Defines a red agent Information Exchange Requirement (IER). It should consist of:

     * **id** [int]: Unique ID for this YAML item
-     * **startStep** [int]: The start step (in the episode) for this IER to begin
-     * **endStep** [int]: The end step (in the episode) for this IER to finish
+     * **start_step** [int]: The start step (in the episode) for this IER to begin
+     * **end_step** [int]: The end step (in the episode) for this IER to finish
     * **load** [int]: The load (in bits/s) for this IER to apply to links
     * **protocol** [freetext]: The protocol to apply to the links. This must match a value in the services list
     * **port** [int]: The port that the protocol is running on. This must match a value in the ports list
     * **source** [int]: The ID of the source node
     * **destination** [int]: The ID of the destination node
-     * **missionCriticality** [enum]: Not currently used. Default to 0
+     * **mission_criticality** [enum]: Not currently used. Default to 0

-* **itemType: GREEN_POL**
+* **item_type: GREEN_POL**

    Defines a green agent pattern-of-life instruction. It should consist of:

      * **id** [int]: Unique ID for this YAML item
-      * **startStep** [int]: The start step (in the episode) for this PoL to begin
-      * **endStep** [int]: Not currently used. Default to same as start step
+      * **start_step** [int]: The start step (in the episode) for this PoL to begin
+      * **end_step** [int]: Not currently used. Default to same as start step
      * **nodeId** [int]: The ID of the node to apply the PoL to
      * **type** [enum]: The type of PoL to apply. Can be one of OPERATING, OS or SERVICE
      * **protocol** [freetext]: The protocol to be affected if SERVICE type is chosen. Must match a value in the services list
-      * **state** [enuum]: The state to apply to the node (which represents the PoL change). Can be one of ON, OFF or RESETTING (for node state) or GOOD, PATCHING or COMPROMISED (for operating system state) or GOOD, PATCHING, COMPROMISED or OVERWHELMED (for service state)
+      * **state** [enuum]: The state to apply to the node (which represents the PoL change). Can be one of ON, OFF or RESETTING (for node state) or GOOD, PATCHING or COMPROMISED (for Software State) or GOOD, PATCHING, COMPROMISED or OVERWHELMED (for service state)

-* **itemType: RED_POL**
+* **item_type: RED_POL**

    Defines a red agent pattern-of-life instruction. It should consist of:

      * **id** [int]: Unique ID for this YAML item
-      * **startStep** [int]: The start step (in the episode) for this PoL to begin
-      * **endStep** [int]: Not currently used. Default to same as start step
+      * **start_step** [int]: The start step (in the episode) for this PoL to begin
+      * **end_step** [int]: Not currently used. Default to same as start step
      * **targetNodeId** [int]: The ID of the node to apply the PoL to
      * **initiator** [enum]: What initiates the PoL. Can be DIRECT, IER or SERVICE
      * **type** [enum]: The type of PoL to apply. Can be one of OPERATING, OS or SERVICE
      * **protocol** [freetext]: The protocol to be affected if SERVICE type is chosen. Must match a value in the services list
-      * **state** [enum]: The state to apply to the node (which represents the PoL change). Can be one of ON, OFF or RESETTING (for node state) or GOOD, PATCHING or COMPROMISED (for operating system state) or GOOD, PATCHING, COMPROMISED or OVERWHELMED (for service state) or GOOD, CORRUPT, DESTROYED, REPAIRING or RESTORING (for file system state)
+      * **state** [enum]: The state to apply to the node (which represents the PoL change). Can be one of ON, OFF or RESETTING (for node state) or GOOD, PATCHING or COMPROMISED (for Software State) or GOOD, PATCHING, COMPROMISED or OVERWHELMED (for service state) or GOOD, CORRUPT, DESTROYED, REPAIRING or RESTORING (for file system state)
      * **sourceNodeId** [int] The ID of the source node containing the service to check (used for SERVICE initiator)
      * **sourceNodeService** [freetext]: The service on the source node to check (used for SERVICE initiator). Must match a value in the services list for this node
      * **sourceNodeServiceState** [enum]: The state of the source node service to check (used for SERVICE initiator). Can be one of GOOD, PATCHING, COMPROMISED or OVERWHELMED

-* **itemType: ACL_RULE**
+* **item_type: ACL_RULE**

    Defines an initial Access Control List (ACL) rule. It should consist of:

@@ -395,3 +486,4 @@ The config_[name].yaml file consists of the following attributes:
      * **destination** [IP address]: Defines the destination IP address for the rule in xxx.xxx.xxx.xxx format
      * **protocol** [freetext]: Defines the protocol for the rule. Must match a value in the services list
      * **port** [int]: Defines the port for the rule. Must match a value in the ports list
+      * **position** [int]: Defines where to place the ACL rule in the list. Lower index or (higher up in the list) means they are checked first. Index starts at 0 (Python indexes).
--- a/docs/source/custom_agent.rst
+++ b/docs/source/custom_agent.rst
@@ -0,0 +1,142 @@
+.. only:: comment
+
+    © Crown-owned copyright 2023, Defence Science and Technology Laboratory UK
+
+Custom Agents
+=============
+
+
+Integrating a user defined blue agent
+*************************************
+
+.. note::
+
+    If you are planning to implement custom RL agents into PrimAITE, you must use the project as a repository. If you install PrimAITE as a python package from wheel, custom agents are not supported.
+
+PrimAITE has integration with Ray RLLib and StableBaselines3 agents. All agents interface with PrimAITE through an :py:class:`primaite.agents.agent.AgentSessionABC<Agent Session>` which provides Input/Output of agent savefiles, as well as capturing and plotting performance metrics during training and evaluation. If you wish to integrate a custom blue agent, it is recommended to create a subclass of the :py:class:`primaite.agents.agent.AgentSessionABC` and implement the ``__init__()``, ``_setup()``,  ``_save_checkpoint()``, ``learn()``, ``evaluate()``, ``_get_latest_checkpoint``, ``load()``, and ``save()`` methods.
+
+Below is a barebones example of a custom agent implementation:
+
+.. code:: python
+
+    # src/primaite/agents/my_custom_agent.py
+
+    from primaite.agents.agent import AgentSessionABC
+    from primaite.common.enums import AgentFramework, AgentIdentifier
+
+    class CustomAgent(AgentSessionABC):
+        def __init__(self, training_config_path, lay_down_config_path):
+            super().__init__(training_config_path, lay_down_config_path)
+            assert self._training_config.agent_framework == AgentFramework.CUSTOM
+            assert self._training_config.agent_identifier == AgentIdentifier.MY_AGENT
+            self._setup()
+
+        def _setup(self):
+            super()._setup()
+            self._env = Primaite(
+                training_config_path=self._training_config_path,
+                lay_down_config_path=self._lay_down_config_path,
+                session_path=self.session_path,
+                timestamp_str=self.timestamp_str,
+        )
+            self._agent = ... # your code to setup agent
+
+        def _save_checkpoint(self):
+            checkpoint_num = self._training_config.checkpoint_every_n_episodes
+            episode_count = self._env.episode_count
+            save_checkpoint = False
+            if checkpoint_num:
+                save_checkpoint = episode_count % checkpoint_num == 0
+            # saves checkpoint if the episode count is not 0 and save_checkpoint flag was set to true
+            if episode_count and save_checkpoint:
+                ...
+                # your code to save checkpoint goes here.
+                # The path should start with self.checkpoints_path and include the episode number.
+
+        def learn(self):
+            ...
+            # call your agent's learning function here.
+
+            super().learn() # this will finalise learning and output session metadata
+            self.save()
+
+        def evaluate(self):
+            ...
+            # call your agent's evaluation function here.
+
+            self._env.close()
+            super().evaluate()
+
+        def _get_latest_checkpoint(self):
+            ...
+            # Load an agent from file.
+
+        @classmethod
+        def load(cls, path):
+            ...
+            # Create a CustomAgent object which loads model weights from file.
+
+        def save(self):
+            ...
+            # Call your agent's function that saves it to a file
+
+
+You will also need to modify :py:class:`primaite.primaite_session.PrimaiteSession<PrimaiteSession>` and :py:mod:`primaite.common.enums` to capture your new agent identifiers.
+
+.. code-block:: python
+    :emphasize-lines: 17, 18
+
+    # src/primaite/common/enums.py
+
+    class AgentIdentifier(Enum):
+        """The Red Agent algo/class."""
+        A2C = 1
+        "Advantage Actor Critic"
+        PPO = 2
+        "Proximal Policy Optimization"
+        HARDCODED = 3
+        "The Hardcoded agents"
+        DO_NOTHING = 4
+        "The DoNothing agents"
+        RANDOM = 5
+        "The RandomAgent"
+        DUMMY = 6
+        "The DummyAgent"
+        CUSTOM_AGENT = 7
+        "Your custom agent"
+
+.. code-block:: python
+    :emphasize-lines: 3, 11, 12
+
+    # src/primaite_session.py
+
+    from primaite.agents.my_custom_agent import CustomAgent
+
+    # ...
+
+        def setup(self):
+        """Performs the session setup."""
+        if self._training_config.agent_framework == AgentFramework.CUSTOM:
+            _LOGGER.debug(f"PrimaiteSession Setup: Agent Framework = {AgentFramework.CUSTOM}")
+            if self._training_config.agent_identifier == AgentIdentifier.CUSTOM_AGENT:
+                self._agent_session = CustomAgent(self._training_config_path, self._lay_down_config_path)
+            if self._training_config.agent_identifier == AgentIdentifier.HARDCODED:
+                _LOGGER.debug(f"PrimaiteSession Setup: Agent Identifier =" f" {AgentIdentifier.HARDCODED}")
+                if self._training_config.action_type == ActionType.NODE:
+                    # Deterministic Hardcoded Agent with Node Action Space
+                    self._agent_session = HardCodedNodeAgent(self._training_config_path, self._lay_down_config_path)
+
+Finally, specify your agent in your training config.
+
+.. code-block:: yaml
+
+    # ~/primaite/2.0.0/config/path/to/your/config_main.yaml
+
+    # Training Config File
+
+    agent_framework: CUSTOM
+    agent_identifier: CUSTOM_AGENT
+    random_red_agent: False
+    # ...
+
+Now you can :ref:`run a primaite session<run a primaite session>` with your custom agent by passing in the custom ``config_main``.
--- a/docs/source/dependencies.rst
+++ b/docs/source/dependencies.rst
@@ -1,24 +1,14 @@
-.. _dependencies:
+.. only:: comment
+
+    © Crown-owned copyright 2023, Defence Science and Technology Laboratory UK
+
+.. role::  raw-html(raw)
+    :format: html
+
+Dependencies
+============

 PrimAITE Dependencies
-=====================
+---------------------

-PrimAITE is built with the following versions of dependencies:
-
-* Python 3.10.9
-* PyYAML 6.0
-* numpy 1.23.5
-* networkx 2.8.8
-* gym 0.21.0
-* matplotlib 3.6.2
-* stable_baselines_3 1.6.2
-
-The latest release of PrimAITE has been tested against the following versions of dependencies:
-
-* Python 3.10.9
-* PyYAML 6.0
-* numpy 1.23.5
-* networkx 2.8.8
-* gym 0.21.0
-* matplotlib 3.6.2
-* stable_baselines_3 1.6.2
+.. include:: primaite-dependencies.rst
--- a/docs/source/getting_started.rst
+++ b/docs/source/getting_started.rst
@@ -0,0 +1,155 @@
+.. only:: comment
+
+    © Crown-owned copyright 2023, Defence Science and Technology Laboratory UK
+
+.. _getting-started:
+
+Getting Started
+===============
+
+**Getting Started with PrimAITE**
+
+Pre-Requisites
+
+In order to get **PrimAITE** installed, you will need to have a python version between 3.8 and 3.10 installed. If you don't already have it, this is how to install it:
+
+
+.. tabs:: lang
+
+    .. code-tab:: bash
+        :caption: Unix
+
+        sudo add-apt-repository ppa:deadsnakes/ppa
+        sudo apt install python3.10
+        sudo apt-get install python3-pip
+        sudo apt-get install python3-venv
+
+    .. code-tab:: text
+        :caption: Windows (Powershell)
+
+        - Manual install from: https://www.python.org/downloads/release/python-31011/
+
+**PrimAITE** is designed to be OS-agnostic, and thus should work on most variations/distros of Linux, Windows, and MacOS.
+
+Install PrimAITE
+****************
+
+1. Create a primaite directory in your home directory:
+
+.. tabs:: lang
+
+    .. code-tab:: bash
+        :caption: Unix
+
+        mkdir ~/primaite/2.0.0
+
+    .. code-tab:: powershell
+        :caption: Windows (Powershell)
+
+        mkdir ~\primaite\2.0.0
+
+2. Navigate to the primaite directory and create a new python virtual environment (venv)
+
+.. tabs:: lang
+
+    .. code-tab:: bash
+        :caption: Unix
+
+        cd ~/primaite/2.0.0
+        python3 -m venv .venv
+
+    .. code-tab:: powershell
+        :caption: Windows (Powershell)
+
+        cd ~\primaite\2.0.0
+        python3 -m venv .venv
+        attrib +h .venv /s /d # Hides the .venv directory
+
+3. Activate the venv
+
+.. tabs:: lang
+
+    .. code-tab:: bash
+        :caption: Unix
+
+        source .venv/bin/activate
+
+    .. code-tab:: powershell
+        :caption: Windows (Powershell)
+
+        .\.venv\Scripts\activate
+
+
+4. Install PrimAITE using pip from PyPi
+
+.. tabs:: lang
+
+    .. code-tab:: bash
+        :caption: Unix
+
+        pip install primaite
+
+    .. code-tab:: powershell
+        :caption: Windows (Powershell)
+
+        pip install primaite
+
+5. Perform the PrimAITE setup
+
+.. tabs:: lang
+
+    .. code-tab:: bash
+        :caption: Unix
+
+        primaite setup
+
+    .. code-tab:: powershell
+        :caption: Windows (Powershell)
+
+        primaite setup
+
+Clone & Install PrimAITE for Development
+****************************************
+
+To be able to extend PrimAITE further, or to build wheels manually before install, clone the repository to a location
+of your choice:
+
+.. TODO:: Add repo path once we know what it is
+
+.. code-block:: bash
+
+    git clone <repo path>
+    cd primaite
+
+Create and activate your Python virtual environment (venv)
+
+.. tabs:: lang
+
+    .. code-tab:: bash
+        :caption: Unix
+
+        python3 -m venv venv
+        source venv/bin/activate
+
+    .. code-tab:: powershell
+        :caption: Windows (Powershell)
+
+        python3 -m venv venv
+        .\venv\Scripts\activate
+
+Install PrimAITE with the dev extra
+
+.. tabs:: lang
+
+    .. code-tab:: bash
+        :caption: Unix
+
+        pip install -e .[dev]
+
+    .. code-tab:: powershell
+        :caption: Windows (Powershell)
+
+        pip install -e .[dev]
+
+
+To view the complete list of packages installed during PrimAITE installation, go to the dependencies page (:ref:`Dependencies`).
--- a/docs/source/glossary.rst
+++ b/docs/source/glossary.rst
@@ -0,0 +1,81 @@
+.. only:: comment
+
+    © Crown-owned copyright 2023, Defence Science and Technology Laboratory UK
+
+Glossary
+=============
+
+.. glossary::
+    :sorted:
+
+    Network
+        The network in primaite is a logical representation of a computer network containing :term:`Nodes<Node>` and :term:`Links<Link>`.
+
+    Node
+        A Node represents a network endpoint. For example a computer, server, switch, or an actuator.
+
+    Link
+        A Link represents the connection between two Nodes. For example, a physical wire between a computer and a switch or a wireless connection.
+
+    Protocol
+        Protocols are used by links to separate different types of network traffic. Common examples would be HTTP, TCP, and UDP.
+
+    Service
+        A service represents a piece of software that is installed on a node, such as a web server or a database.
+
+    Access Control List
+        PrimAITE blocks or allows certain traffic on the network by simulating firewall rules, which are defined in the Access Control List.
+
+    Agent
+        An agent is a representation of a user of the network. Typically this would be a user that is using one of the computer nodes, though it could be an autonomous agent.
+
+    Green agent
+        Simulates typical benign activity on the network, such as real users using computers and servers.
+
+    Red Agent
+        An agent that is aiming to attack the network in some way, for example by executing a Denial-Of-Service attack or stealing data.
+
+    Blue Agent
+        A defensive agent that protects the network from Red Agent attacks to minimise disruption to green agents and protect data.
+
+    Information Exchange Requirement (IER)
+        Simulates network traffic by sending data from one network node to another via links for a specified amount of time. IERs can be part of green agent behaviour or red agent behaviour. PrimAITE can be configured to apply a penalty for green agents' IERs being blocked and a reward for red agents' IERs being blocked.
+
+    Pattern-of-Life (PoL)
+        PoLs allow agents to change the current hardware, OS, file system, or service statuses of nodes during the course of an episode. For example, a green agent may restart a server node to represent scheduled maintainance. A red agent's Pattern-of-Life can be used to attack nodes by changing their states to CORRUPTED or COMPROMISED.
+
+    Reward
+        The reward is a single number used by the blue agent to understand whether it's performing well or poorly. RL agents change their behaviour in an attempt to increase the expected reward each episode. The reward is generated based on the current states of the environment / :term:`reference environment` and is impacted positively by things like green IERS running successfully and negatively by things like nodes being compromised.
+
+    Observation
+        An observation is a representation of the current state of the environment that is given to the learning agent so it can decide on which action to perform. If the environment is 'fully observable', the observation contains information about every possible aspect of the environment. More commonly, the environment is 'partially observable' which means the learning agent has to make decisions without knowing every detail of the current environment state.
+
+    Action
+        The learning agent decides on an action to take on every step in the simulation. The action has the chance to positively or negatively impact the environment state. Over time, the agent aims to learn which actions to take when to maximise the expected reward.
+
+    Training
+        During training, an RL agent is placed in the simulated network and it learns which actions to take in which scenarios to obtain maximum reward.
+
+    Evaluation
+        During evaluation, an RL agent acts on the simulated network but it is not allowed to update it's behaviour. Evaluation is used to assess how successful agents are at defending the network.
+
+    Step
+        The agents can only act in the environment at discrete intervals. The time step is the basic unit of time in the simulation. At each step, the RL agent has an opportunity to observe the state of the environment and decide an action. Steps are also used for updating states for time-dependent activities such as rebooting a node.
+
+    Episode
+        When an episode starts, the network simulation is reset to an initial state. The agents take actions on each step of the episode until it reaches a terminal state, which usually happens after a predetermined number of steps. After the terminal state is reached, a new episode starts and the RL agent has another opportunity to protect the network.
+
+    Reference environment
+        While the network simulation is unfolding, a parallel simulation takes place which is identical to the main one except that blue and red agent actions are not applied. This reference environment essentially shows what would be happening to the network if there had been no cyberattack or defense. The reference environment is used to calculate rewards.
+
+    Transaction
+        PrimAITE records the decisions of the learning agent by saving its observation, action, and reward at every time step. During each session, this data is saved to disk to allow for full inspection.
+
+    Laydown
+        The laydown is a file which defines the training scenario. It contains the network topology, firewall rules, services, protocols, and details about green and red agent behaviours.
+
+    Gym
+        PrimAITE uses the Gym reinforcement learning framework API to create a training environment and interface with RL agents. Gym defines a common way of creating observations, actions, and rewards.
+
+    User app home
+        PrimAITE supports upgrading software version while retaining user data. The user data directory is where configs, notebooks, and results are stored, this location is `~/primaite<version>` on linux/darwin and `C:\Users\<username>\primaite\<version>` on Windows.
--- a/docs/source/migration_1.2_-_2.0.rst
+++ b/docs/source/migration_1.2_-_2.0.rst
@@ -0,0 +1,57 @@
+.. only:: comment
+
+    © Crown-owned copyright 2023, Defence Science and Technology Laboratory UK
+
+v1.2 to v2.0 Migration guide
+============================
+
+**1. Installing PrimAITE**
+
+    Like before, you can install primaite from the repository by running ``pip install -e .``. But, there is now an additional setup step which does several things, like setting up user directories, copy default configs and notebooks, etc. Once you have installed PrimAITE to your virtual environment, run this command to finalise setup.
+
+    .. code-block:: bash
+
+        primaite setup
+
+**2. Running a training session**
+
+    In version 1.2 of PrimAITE, the main entry point for training or evaluating agents was the ``src/primaite/main.py`` file. v2.0.0 introduced managed 'sessions' which are responsible for reading configuration files, performing training, and writing outputs.
+
+    ``main.py`` file still runs a training session but it now uses the new `PrimaiteSession`, and it now requires you to provide the path to your config files.
+
+    .. code-block:: bash
+
+        python src/primaite/main.py --tc path/to/training-config.yaml --ldc path/to/laydown-config.yaml
+
+    Alternatively, the session can be invoked via the commandline by running:
+
+    .. code-block:: bash
+
+        primaite session --tc path/to/training-config.yaml --ldc path/to/laydown-config.yaml
+
+**3. Location of configs**
+
+    In version 1.2, training configs and laydown configs were all stored in the project repository under ``src/primaite/config``. Version 2.0.0 introduced user data directories, and now when you install and setup PrimAITE, config files are stored in your user data location. On Linux/OSX, this is stored in ``~/primaite/2.0.0/config``. On Windows, this is stored in ``C:\Users\<your username>\primaite\configs``. Upon first setup, the configs folder is populated with some default yaml files. It is recommended that you store all your custom configuration files here.
+
+**4. Contents of configs**
+
+    Some things that were previously part of the laydown config are now part of the traning config.
+
+        * Actions
+
+    If you have custom configs which use these, you will need to adapt them by moving the configuration from the laydown config to the training config.
+
+    Also, there are new configurable items in the training config:
+
+        * Observations
+        * Agent framework
+        * Agent
+        * Deep learning framework
+        * random red agents
+        * seed
+        * deterministic
+        * hard coded agent view
+
+    Each of these items have default values which are designed so that PrimAITE has the same behaviour as it did in 1.2.0, so you do not have to specify them.
+
+    ACL Rules in laydown configs have a new required parameter: ``position``. The lower the position, the higher up in the ACL table the rule will placed. If you have custom laydowns, you will need to go through them and add a position to each ACL_RULE.
--- a/docs/source/primaite_session.rst
+++ b/docs/source/primaite_session.rst
@@ -0,0 +1,182 @@
+.. only:: comment
+
+    © Crown-owned copyright 2023, Defence Science and Technology Laboratory UK
+
+.. _run a primaite session:
+
+Run a PrimAITE Session
+======================
+
+Run
+---
+
+A PrimAITE session can be ran either with the ``primaite session`` command from the cli
+(See :func:`primaite.cli.session`), or by calling :func:`primaite.main.run` from a Python terminal or Jupyter Notebook.
+Both the ``primaite session`` and :func:`primaite.main.run` take a training config and a lay down config as parameters.
+
+
+.. tabs::
+
+    .. code-tab:: bash
+        :caption: Unix CLI
+
+        cd ~/primaite/2.0.0
+        source ./.venv/bin/activate
+        primaite session --tc ./config/my_training_config.yaml --ldc ./config/my_lay_down_config.yaml
+
+    .. code-tab:: powershell
+        :caption: Powershell CLI
+
+        cd ~\primaite\2.0.0
+        .\.venv\Scripts\activate
+        primaite session --tc .\config\my_training_config.yaml --ldc .\config\my_lay_down_config.yaml
+
+
+    .. code-tab:: python
+        :caption: Python
+
+        from primaite.main import run
+
+        training_config = <path to training config yaml file>
+        lay_down_config = <path to lay down config yaml file>
+        run(training_config, lay_down_config)
+
+When a session is ran, a session output sub-directory is created in the users app sessions directory (``~/primaite/2.0.0/sessions``).
+The sub-directory is formatted as such: ``~/primaite/2.0.0/sessions/<yyyy-mm-dd>/<yyyy-mm-dd>_<hh-mm-dd>/``
+
+For example, when running a session at 17:30:00 on 31st January 2023, the session will output to:
+``~/primaite/2.0.0/sessions/2023-01-31/2023-01-31_17-30-00/``.
+
+``primaite session`` can be ran in the terminal/command prompt without arguments. It will use the default configs in the directory ``primaite/config/example_config``.
+
+
+Outputs
+-------
+
+PrimAITE produces four types of outputs:
+
+* Session Metadata
+* Results
+* Diagrams
+* Saved agents (training checkpoints and a final trained agent)
+
+
+**Session Metadata**
+
+PrimAITE creates a ``session_metadata.json`` file that contains the following metadata:
+
+    * **uuid** - The UUID assigned to the session upon instantiation.
+    * **start_datetime** - The date & time the session started in iso format.
+    * **end_datetime** - The date & time the session ended in iso format.
+    * **learning**
+        * **total_episodes** - The total number of training episodes completed.
+        * **total_time_steps** - The total number of training time steps completed.
+    * **evaluation**
+        * **total_episodes** - The total number of evaluation episodes completed.
+        * **total_time_steps** - The total number of evaluation time steps completed.
+    * **env**
+        * **training_config**
+            * **All training config items**
+        * **lay_down_config**
+            * **All lay down config items**
+
+
+**Results**
+
+PrimAITE automatically creates two sets of results from each learning and evaluation session:
+
+* Average reward per episode - a csv file listing the average reward for each episode of the session. This provides, for example, an indication of the change over a training session of the reward value
+* All transactions - a csv file listing the following values for every step of every episode:
+
+    * Timestamp
+    * Episode number
+    * Step number
+    * Reward value
+    * Action taken (as presented by the blue agent on this step). Individual elements of the action space are presented in the format AS_X
+    * Initial observation space (what the blue agent observed when it decided its action)
+
+**Diagrams**
+
+* For each session, PrimAITE automatically creates a visualisation of the system / network lay down configuration.
+* For each learning and evaluation task within the session, PrimAITE automatically plots the average reward per episode using PlotLY and saves it to the learning or evaluation subdirectory in the session directory.
+
+**Saved agents**
+
+For each training session, assuming the agent being trained implements the *save()* function and this function is called by the code, PrimAITE automatically saves the agent state.
+
+**Example Session Directory Structure**
+
+.. code-block:: text
+
+    ~/
+    └── primaite/
+        └── 2.0.0/
+            └── sessions/
+                └── 2023-07-18/
+                    └── 2023-07-18_11-06-04/
+                        ├── evaluation/
+                        │   ├── all_transactions_2023-07-18_11-06-04.csv
+                        │   ├── average_reward_per_episode_2023-07-18_11-06-04.csv
+                        │   └── average_reward_per_episode_2023-07-18_11-06-04.png
+                        ├── learning/
+                        │   ├── all_transactions_2023-07-18_11-06-04.csv
+                        │   ├── average_reward_per_episode_2023-07-18_11-06-04.csv
+                        │   ├── average_reward_per_episode_2023-07-18_11-06-04.png
+                        │   ├── checkpoints/
+                        │   │   └── sb3ppo_10.zip
+                        │   ├── SB3_PPO.zip
+                        │   └── tensorboard_logs/
+                        │       ├── PPO_1/
+                        │       │   └── events.out.tfevents.1689674765.METD-9PMRFB3.42960.0
+                        │       ├── PPO_2/
+                        │       │   └── events.out.tfevents.1689674766.METD-9PMRFB3.42960.1
+                        │       ├── PPO_3/
+                        │       │   └── events.out.tfevents.1689674766.METD-9PMRFB3.42960.2
+                        │       ├── PPO_4/
+                        │       │   └── events.out.tfevents.1689674767.METD-9PMRFB3.42960.3
+                        │       ├── PPO_5/
+                        │       │   └── events.out.tfevents.1689674767.METD-9PMRFB3.42960.4
+                        │       ├── PPO_6/
+                        │       │   └── events.out.tfevents.1689674768.METD-9PMRFB3.42960.5
+                        │       ├── PPO_7/
+                        │       │   └── events.out.tfevents.1689674768.METD-9PMRFB3.42960.6
+                        │       ├── PPO_8/
+                        │       │   └── events.out.tfevents.1689674769.METD-9PMRFB3.42960.7
+                        │       ├── PPO_9/
+                        │       │   └── events.out.tfevents.1689674770.METD-9PMRFB3.42960.8
+                        │       └── PPO_10/
+                        │           └── events.out.tfevents.1689674770.METD-9PMRFB3.42960.9
+                        ├── network_2023-07-18_11-06-04.png
+                        └── session_metadata.json
+
+Loading a session
+-----------------
+
+A previous session can be loaded by providing the **directory** of the previous session to either the ``primaite session`` command from the cli
+(See :func:`primaite.cli.session`), or by calling :func:`primaite.main.run` with session_path.
+
+.. tabs::
+
+    .. code-tab:: bash
+        :caption: Unix CLI
+
+        cd ~/primaite/2.0.0
+        source ./.venv/bin/activate
+        primaite session --load "path/to/session"
+
+    .. code-tab:: bash
+        :caption: Powershell CLI
+
+        cd ~\primaite\2.0.0
+        .\.venv\Scripts\activate
+        primaite session --load "path\to\session"
+
+
+    .. code-tab:: python
+        :caption: Python
+
+        from primaite.main import run
+
+        run(session_path=<previous session directory>)
+
+When PrimAITE runs a loaded session, PrimAITE will output in the provided session directory
--- a/docs/source/results.rst
+++ b/docs/source/results.rst
@@ -1,42 +0,0 @@
-.. _results:
-
-Results, Output and Logging from PrimAITE
-=========================================
-
-PrimAITE produces four types of data:
-
-* Outputs - Results
-* Outputs - Diagrams
-* Outputs - Saved agents
-* Logging
-
-Outputs can be found in the *[Install Directory]\\Primaite\\Primaite\\outputs* directory
-
-Logging can be found in the *[Install Directory]\\Primaite\\Primaite\\logs* directory
-
-**Outputs - Results**
-
-PrimAITE automatically creates two sets of results from each session, and stores them in the *Results* folder:
-
-* Average reward per episode - a csv file listing the average reward for each episode of the session. This provides, for example, an indication of the change over a training session of the reward value
-* All transactions - a csv file listing the following values for every step of every episode:
-
-	* Timestamp
-	* Episode number
-	* Step number
-	* Initial observation space (before red and blue agent actions have been taken). Individual elements of the observation space are presented in the format OSI_X_Y
-	* Resulting observation space (after the red and blue agent actions have been taken) Individual elements of the observation space are presented in the format OSN_X_Y
-	* Reward value
-	* Action space (as presented by the blue agent on this step). Individual elements of the action space are presented in the format AS_X
-
-**Outputs - Diagrams**
-
-For each session, PrimAITE automatically creates a visualisation of the system / network laydown configuration, and stores it in the *Diagrams* folder.
-
-**Outputs - Saved agents**
-
-For each training session, assuming the agent being trained implements the *save()* function and this function is called by the code, PrimAITE automatically saves the agent state and stores it in the *agents* folder.
-
-**Logging**
-
-PrimAITE also provides output logs (for diagnosis) using the Python Logging package. These can be found in the *[Install Directory]\\Primaite\\Primaite\\logs* directory
--- a/docs/source/session.rst
+++ b/docs/source/session.rst
@@ -1,86 +0,0 @@
-.. _session:
-
-Running a PrimAITE Training or Evaluation Session
-=================================================
-
-The application will determine whether a Training or Evaluation session is being executed via the 'sessionType' value in the config_mail.yaml file. A PrimAITE session will usually be associated with a "Use Case Profile"; this document will present:
-
-* The Use Case name, default number of steps in an episode and default number of episodes in a session. The number of steps and episodes can be modified in the configuration files
-* The system laydown being modelled
-* The objectives of the session (steady-state), the red agent and the blue agent (in a defensive role)
-* The green agent pattern-of-life profile
-* The red agent attack profile
-* The observation space definition
-* The action space definition
-* Agent integration guidance
-* Initial Access Control List settings (if applicable)
-* The reward function definition
-
-**Integrating a user defined blue agent**
-
-Integrating a blue agent with PrimAITE requires some modification of the code within the main.py file. The main.py file consists of a number of functions, each of which will invoke training for a particular agent. These are:
-
-* Generic (run_generic)
-* Stable Baselines 3 PPO (run_stable_baselines3_ppo)
-* Stable Baselines 3 A2C (run_stable_baselines3_a2c)
-
-The selection of which agent type to use is made via the config_main.yaml file. In order to train a user generated agent,
-the run_generic function should be selected, and should be modified (typically) to be:
-
-.. code:: python
-
-    agent = MyAgent(environment, max_steps)
-    for episode in range(0, num_episodes):
-        agent.learn()      
-    env.close()
-    save_agent(agent)
-
-Where:
-
-* *MyAgent* is the user created agent
-* *environment* is the PrimAITE environment
-* *max_steps* is the number of steps in an episode, as defined in the config_[name].yaml file
-* *num_episodes* is the number of episodes in the session, as defined in the config_main.yaml file
-* the *.learn()* function should be defined in the user created agent
-* the *env.close()* function is defined within PrimAITE
-* the *save_agent()* assumes that a *save()* function has been defined in the user created agent. If not, this line can be ommitted (although it is encouraged, since it will allow the agent to be saved and ported)
-
-The code below provides a suggested format for the learn() function within the user created agent.
-It's important to include the *self.environment.reset()* call within the episode loop in order that the
-environment is reset between episodes. Note that the example below should not be considered exhaustive.
-
-.. code:: python
-
-    def learn(self) :
-
-    # pre-reqs
-
-    # reset the environment
-    self.environment.reset()
-    done = False
-
-    for step in range(max_steps):
-        # calculate the action
-        action = ...
-
-        # execute the environment step
-        new_state, reward, done, info = self.environment.step(action)
-
-        # algorithm updates
-        ...
-
-        # update to our new state
-        state = new_state
-
-        # if done, finish episode
-        if done == True:
-            break
-
-**Running the session**
-
-In order to execute a session, carry out the following steps:
-
-1. Navigate to "[Install directory]\\Primaite\\Primaite\\”
-2. Start a console window (type “CMD” in path window, or start a console window first and navigate to “[Install Directory]\\Primaite\\Primaite\\”)
-3. Type “python main.py”
-4. The session will start with an output indicating the current episode, and average reward value for the episode
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -5,13 +5,13 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "primaite"
 description = "PrimAITE (Primary-level AI Training Environment) is a simulation environment for training AI under the ARCD programme."
-authors = [{name="QinetiQ Training and Simulation Ltd"}]
-license = {text = "MIT License"}
-requires-python = ">=3.8"
+authors = [{name="Defence Science and Technology Laboratory UK", email="oss@dstl.gov.uk"}]
+license = {file = "LICENSE"}
+requires-python = ">=3.8, <3.11"
 dynamic = ["version", "readme"]
 classifiers = [
-    "License :: MIT License",
-    "Development Status :: 4 - Beta",
+    "License :: OSI Approved :: MIT License",
+    "Development Status :: 5 - Production/Stable",
    "Operating System :: Microsoft :: Windows",
    "Operating System :: MacOS",
    "Operating System :: POSIX :: Linux",
@@ -20,17 +20,24 @@ classifiers = [
    "Programming Language :: Python :: 3.8",
    "Programming Language :: Python :: 3.9",
    "Programming Language :: Python :: 3.10",
-    "Programming Language :: Python :: 3.11",
    "Programming Language :: Python :: 3 :: Only",
 ]

 dependencies = [
    "gym==0.21.0",
+    "jupyterlab==3.6.1",
+    "kaleido==0.2.1",
    "matplotlib==3.7.1",
    "networkx==3.1",
    "numpy==1.23.5",
+    "platformdirs==3.5.1",
+    "plotly==5.15.0",
+    "polars==0.18.4",
    "PyYAML==6.0",
-    "stable-baselines3==1.6.2"
+    "ray[rllib]==2.2.0",
+    "stable-baselines3==1.6.2",
+    "tensorflow==2.12.0",
+    "typer[all]==0.9.0"
 ]

 [tool.setuptools.dynamic]
@@ -45,17 +52,32 @@ license-files = ["LICENSE"]

 [project.optional-dependencies]
 dev = [
-    "setuptools==66",
-    "pytest==7.2.0",
+    "build==0.10.0",
    "flake8==6.0.0",
-    "Sphinx==6.1.3",
    "furo==2023.3.27",
-    "sphinx-code-tabs==0.5.3",
-    "sphinx-copybutton==0.5.2",
-    "pytest-cov==4.0.0",
-    "pytest-flake8==1.1.1",
+    "gputil==1.4.0",
    "pip-licenses==4.3.0",
    "pre-commit==2.20.0",
-    "wheel==0.38.4",
-    "build==0.10.0"
+    "pylatex==1.4.1",
+    "pytest==7.2.0",
+    "pytest-xdist==3.3.1",
+    "pytest-cov==4.0.0",
+    "pytest-flake8==1.1.1",
+    "setuptools==66",
+    "Sphinx==6.1.3",
+    "sphinx-code-tabs==0.5.3",
+    "sphinx-copybutton==0.5.2",
+    "wheel==0.38.4"
 ]
+
+[project.scripts]
+primaite = "primaite.cli:app"
+
+[tool.isort]
+profile = "black"
+line_length = 120
+force_sort_within_sections = "False"
+order_by_type = "False"
+
+[tool.black]
+line-length = 120
--- a/pytest.ini
+++ b/pytest.ini
@@ -1,3 +1,5 @@
 [pytest]
 testpaths =
    tests
+markers =
+    env_config_paths
--- a/setup.py
+++ b/setup.py
@@ -1,14 +1,13 @@
-# Crown Copyright (C) Dstl 2022. DEFCON 703. Shared in confidence.
+# © Crown-owned copyright 2023, Defence Science and Technology Laboratory UK
 from setuptools import setup
-from wheel.bdist_wheel import bdist_wheel as _bdist_wheel
+from wheel.bdist_wheel import bdist_wheel as _bdist_wheel  # noqa


 class bdist_wheel(_bdist_wheel):  # noqa
    def finalize_options(self):  # noqa
        super().finalize_options()
-        # forces whee to be platform and Python version specific
-        # Source: https://stackoverflow.com/a/45150383
-        self.root_is_pure = False
+        # Set to False if you need to build OS and Python specific wheels
+        self.root_is_pure = True  # noqa


 setup(
--- a/src/primaite/VERSION
+++ b/src/primaite/VERSION
@@ -1 +1 @@
-1.2.1
+2.0.0
--- a/src/primaite/init.py
+++ b/src/primaite/init.py
@@ -0,0 +1,207 @@
+# © Crown-owned copyright 2023, Defence Science and Technology Laboratory UK
+import logging
+import logging.config
+import shutil
+import sys
+from bisect import bisect
+from logging import Formatter, Logger, LogRecord, StreamHandler
+from logging.handlers import RotatingFileHandler
+from pathlib import Path
+from typing import Any, Dict, Final, List
+
+import pkg_resources
+import yaml
+from platformdirs import PlatformDirs
+
+with open(Path(__file__).parent.resolve() / "VERSION", "r") as file:
+    __version__ = file.readline().strip()
+
+
+class _PrimaitePaths:
+    """
+    A Primaite paths class that leverages PlatformDirs.
+
+    The PlatformDirs appname is 'primaite' and the version is ``primaite.__version__`.
+    """
+
+    def __init__(self):
+        self._dirs: Final[PlatformDirs] = PlatformDirs(appname="primaite", version=__version__)
+
+    def _get_dirs_properties(self) -> List[str]:
+        class_items = self.__class__.__dict__.items()
+        return [k for k, v in class_items if isinstance(v, property)]
+
+    def mkdirs(self):
+        """
+        Creates all Primaite directories.
+
+        Does this by retrieving all properties in the PrimaiteDirs class and calls each one.
+        """
+        for p in self._get_dirs_properties():
+            getattr(self, p)
+
+    @property
+    def user_home_path(self) -> Path:
+        """The PrimAITE user home path."""
+        path = Path.home() / "primaite" / __version__
+        path.mkdir(exist_ok=True, parents=True)
+        return path
+
+    @property
+    def user_sessions_path(self) -> Path:
+        """The PrimAITE user sessions path."""
+        path = self.user_home_path / "sessions"
+        path.mkdir(exist_ok=True, parents=True)
+        return path
+
+    @property
+    def user_config_path(self) -> Path:
+        """The PrimAITE user config path."""
+        path = self.user_home_path / "config"
+        path.mkdir(exist_ok=True, parents=True)
+        return path
+
+    @property
+    def user_notebooks_path(self) -> Path:
+        """The PrimAITE user notebooks path."""
+        path = self.user_home_path / "notebooks"
+        path.mkdir(exist_ok=True, parents=True)
+        return path
+
+    @property
+    def app_home_path(self) -> Path:
+        """The PrimAITE app home path."""
+        path = self._dirs.user_data_path
+        path.mkdir(exist_ok=True, parents=True)
+        return path
+
+    @property
+    def app_config_dir_path(self) -> Path:
+        """The PrimAITE app config directory path."""
+        path = self._dirs.user_config_path
+        path.mkdir(exist_ok=True, parents=True)
+        return path
+
+    @property
+    def app_config_file_path(self) -> Path:
+        """The PrimAITE app config file path."""
+        return self.app_config_dir_path / "primaite_config.yaml"
+
+    @property
+    def app_log_dir_path(self) -> Path:
+        """The PrimAITE app log directory path."""
+        if sys.platform == "win32":
+            path = self.app_home_path / "logs"
+        else:
+            path = self._dirs.user_log_path
+        path.mkdir(exist_ok=True, parents=True)
+        return path
+
+    @property
+    def app_log_file_path(self) -> Path:
+        """The PrimAITE app log file path."""
+        return self.app_log_dir_path / "primaite.log"
+
+    def __repr__(self):
+        properties_str = ", ".join([f"{p}='{getattr(self, p)}'" for p in self._get_dirs_properties()])
+        return f"{self.__class__.__name__}({properties_str})"
+
+
+PRIMAITE_PATHS: Final[_PrimaitePaths] = _PrimaitePaths()
+
+
+def _host_primaite_config():
+    if not PRIMAITE_PATHS.app_config_file_path.exists():
+        pkg_config_path = Path(pkg_resources.resource_filename("primaite", "setup/_package_data/primaite_config.yaml"))
+        shutil.copy2(pkg_config_path, PRIMAITE_PATHS.app_config_file_path)
+
+
+_host_primaite_config()
+
+
+def _get_primaite_config() -> Dict:
+    config_path = PRIMAITE_PATHS.app_config_file_path
+    if not config_path.exists():
+        config_path = Path(pkg_resources.resource_filename("primaite", "setup/_package_data/primaite_config.yaml"))
+    with open(config_path, "r") as file:
+        primaite_config = yaml.safe_load(file)
+    log_level_map = {
+        "NOTSET": logging.NOTSET,
+        "DEBUG": logging.DEBUG,
+        "INFO": logging.INFO,
+        "WARN": logging.WARN,
+        "ERROR": logging.ERROR,
+        "CRITICAL": logging.CRITICAL,
+    }
+    primaite_config["log_level"] = log_level_map[primaite_config["logging"]["log_level"]]
+    return primaite_config
+
+
+_PRIMAITE_CONFIG = _get_primaite_config()
+
+
+class _LevelFormatter(Formatter):
+    """
+    A custom level-specific formatter.
+
+    Credit to: https://stackoverflow.com/a/68154386
+    """
+
+    def __init__(self, formats: Dict[int, str], **kwargs: Any) -> None:
+        super().__init__()
+
+        if "fmt" in kwargs:
+            raise ValueError("Format string must be passed to level-surrogate formatters, " "not this one")
+
+        self.formats = sorted((level, Formatter(fmt, **kwargs)) for level, fmt in formats.items())
+
+    def format(self, record: LogRecord) -> str:
+        """Overrides ``Formatter.format``."""
+        idx = bisect(self.formats, (record.levelno,), hi=len(self.formats) - 1)
+        level, formatter = self.formats[idx]
+        return formatter.format(record)
+
+
+_LEVEL_FORMATTER: Final[_LevelFormatter] = _LevelFormatter(
+    {
+        logging.DEBUG: _PRIMAITE_CONFIG["logging"]["logger_format"]["DEBUG"],
+        logging.INFO: _PRIMAITE_CONFIG["logging"]["logger_format"]["INFO"],
+        logging.WARNING: _PRIMAITE_CONFIG["logging"]["logger_format"]["WARNING"],
+        logging.ERROR: _PRIMAITE_CONFIG["logging"]["logger_format"]["ERROR"],
+        logging.CRITICAL: _PRIMAITE_CONFIG["logging"]["logger_format"]["CRITICAL"],
+    }
+)
+
+_STREAM_HANDLER: Final[StreamHandler] = StreamHandler()
+
+_FILE_HANDLER: Final[RotatingFileHandler] = RotatingFileHandler(
+    filename=PRIMAITE_PATHS.app_log_file_path,
+    maxBytes=10485760,  # 10MB
+    backupCount=9,  # Max 100MB of logs
+    encoding="utf8",
+)
+_STREAM_HANDLER.setLevel(_PRIMAITE_CONFIG["logging"]["log_level"])
+_FILE_HANDLER.setLevel(_PRIMAITE_CONFIG["logging"]["log_level"])
+
+_LOG_FORMAT_STR: Final[str] = _PRIMAITE_CONFIG["logging"]["logger_format"]
+_STREAM_HANDLER.setFormatter(_LEVEL_FORMATTER)
+_FILE_HANDLER.setFormatter(_LEVEL_FORMATTER)
+
+_LOGGER = logging.getLogger(__name__)
+
+_LOGGER.addHandler(_STREAM_HANDLER)
+_LOGGER.addHandler(_FILE_HANDLER)
+
+
+def getLogger(name: str) -> Logger:  # noqa
+    """
+    Get a PrimAITE logger.
+
+    :param name: The logger name. Use ``__name__``.
+    :return: An instance of :py:class:`logging.Logger` with the PrimAITE
+        logging config.
+    """
+    logger = logging.getLogger(name)
+    logger.setLevel(_PRIMAITE_CONFIG["log_level"])
+
+    return logger
--- a/src/primaite/acl/init.py
+++ b/src/primaite/acl/init.py
@@ -1 +1,2 @@
-# Crown Copyright (C) Dstl 2022. DEFCON 703. Shared in confidence.
+# © Crown-owned copyright 2023, Defence Science and Technology Laboratory UK
+"""Access Control List. Models firewall functionality."""
--- a/src/primaite/acl/access_control_list.py
+++ b/src/primaite/acl/access_control_list.py
@@ -1,48 +1,62 @@
-# Crown Copyright (C) Dstl 2022. DEFCON 703. Shared in confidence.
+# © Crown-owned copyright 2023, Defence Science and Technology Laboratory UK
 """A class that implements the access control list implementation for the network."""
+import logging
+from typing import Dict, Final, List, Union

 from primaite.acl.acl_rule import ACLRule
+from primaite.common.enums import RulePermissionType
+
+_LOGGER: Final[logging.Logger] = logging.getLogger(__name__)


 class AccessControlList:
    """Access Control List class."""

-    def __init__(self):
+    def __init__(self, implicit_permission: RulePermissionType, max_acl_rules: int) -> None:
        """Init."""
-        self.acl = {}  # A dictionary of ACL Rules
+        # Implicit ALLOW or DENY firewall spec
+        self.acl_implicit_permission = implicit_permission
+        # Implicit rule in ACL list
+        if self.acl_implicit_permission == RulePermissionType.DENY:
+            self.acl_implicit_rule = ACLRule(RulePermissionType.DENY, "ANY", "ANY", "ANY", "ANY")
+        elif self.acl_implicit_permission == RulePermissionType.ALLOW:
+            self.acl_implicit_rule = ACLRule(RulePermissionType.ALLOW, "ANY", "ANY", "ANY", "ANY")
+        else:
+            raise ValueError(f"implicit permission must be ALLOW or DENY, got {self.acl_implicit_permission}")

-    def check_address_match(self, _rule, _source_ip_address, _dest_ip_address):
-        """
-        Checks for IP address matches.
+        # Maximum number of ACL Rules in ACL
+        self.max_acl_rules: int = max_acl_rules
+        # A list of ACL Rules
+        self._acl: List[Union[ACLRule, None]] = [None] * (self.max_acl_rules - 1)

-        Args:
-            _rule: The rule being checked
-            _source_ip_address: the source IP address to compare
-            _dest_ip_address: the destination IP address to compare
+    @property
+    def acl(self) -> List[Union[ACLRule, None]]:
+        """Public access method for private _acl."""
+        return self._acl + [self.acl_implicit_rule]

-        Returns:
-             True if match; False otherwise.
+    def check_address_match(self, _rule: ACLRule, _source_ip_address: str, _dest_ip_address: str) -> bool:
+        """Checks for IP address matches.
+
+        :param _rule: The rule object to check
+        :type _rule: ACLRule
+        :param _source_ip_address: Source IP address to compare
+        :type _source_ip_address: str
+        :param _dest_ip_address: Destination IP address to compare
+        :type _dest_ip_address: str
+        :return: True if there is a match, otherwise False.
+        :rtype: bool
        """
        if (
-            (
-                _rule.get_source_ip() == _source_ip_address
-                and _rule.get_dest_ip() == _dest_ip_address
-            )
-            or (
-                _rule.get_source_ip() == "ANY"
-                and _rule.get_dest_ip() == _dest_ip_address
-            )
-            or (
-                _rule.get_source_ip() == _source_ip_address
-                and _rule.get_dest_ip() == "ANY"
-            )
+            (_rule.get_source_ip() == _source_ip_address and _rule.get_dest_ip() == _dest_ip_address)
+            or (_rule.get_source_ip() == "ANY" and _rule.get_dest_ip() == _dest_ip_address)
+            or (_rule.get_source_ip() == _source_ip_address and _rule.get_dest_ip() == "ANY")
            or (_rule.get_source_ip() == "ANY" and _rule.get_dest_ip() == "ANY")
        ):
            return True
        else:
            return False

-    def is_blocked(self, _source_ip_address, _dest_ip_address, _protocol, _port):
+    def is_blocked(self, _source_ip_address: str, _dest_ip_address: str, _protocol: str, _port: str) -> bool:
        """
        Checks for rules that block a protocol / port.

@@ -55,27 +69,30 @@ class AccessControlList:
        Returns:
             Indicates block if all conditions are satisfied.
        """
-        for rule_key, rule_value in self.acl.items():
-            if self.check_address_match(
-                rule_value, _source_ip_address, _dest_ip_address
-            ):
-                if (
-                    rule_value.get_protocol() == _protocol
-                    or rule_value.get_protocol() == "ANY"
-                ) and (
-                    str(rule_value.get_port()) == str(_port)
-                    or rule_value.get_port() == "ANY"
-                ):
-                    # There's a matching rule. Get the permission
-                    if rule_value.get_permission() == "DENY":
-                        return True
-                    elif rule_value.get_permission() == "ALLOW":
-                        return False
+        for rule in self.acl:
+            if isinstance(rule, ACLRule):
+                if self.check_address_match(rule, _source_ip_address, _dest_ip_address):
+                    if (rule.get_protocol() == _protocol or rule.get_protocol() == "ANY") and (
+                        str(rule.get_port()) == str(_port) or rule.get_port() == "ANY"
+                    ):
+                        # There's a matching rule. Get the permission
+                        if rule.get_permission() == RulePermissionType.DENY:
+                            return True
+                        elif rule.get_permission() == RulePermissionType.ALLOW:
+                            return False

        # If there has been no rule to allow the IER through, it will return a blocked signal by default
        return True

-    def add_rule(self, _permission, _source_ip, _dest_ip, _protocol, _port):
+    def add_rule(
+        self,
+        _permission: RulePermissionType,
+        _source_ip: str,
+        _dest_ip: str,
+        _protocol: str,
+        _port: str,
+        _position: str,
+    ) -> None:
        """
        Adds a new rule.

@@ -85,12 +102,36 @@ class AccessControlList:
            _dest_ip: the destination IP address
            _protocol: the protocol
            _port: the port
+            _position: position to insert ACL rule into ACL list (starting from index 1 and NOT 0)
        """
-        new_rule = ACLRule(_permission, _source_ip, _dest_ip, _protocol, str(_port))
-        hash_value = hash(new_rule)
-        self.acl[hash_value] = new_rule
+        try:
+            position_index = int(_position)
+        except TypeError:
+            _LOGGER.info(f"Position {_position} could not be converted to integer.")
+            return

-    def remove_rule(self, _permission, _source_ip, _dest_ip, _protocol, _port):
+        new_rule = ACLRule(_permission, _source_ip, _dest_ip, _protocol, str(_port))
+        # Checks position is in correct range
+        if self.max_acl_rules - 1 > position_index > -1:
+            try:
+                _LOGGER.info(f"Position {position_index} is valid.")
+                # Check to see Agent will not overwrite current ACL in ACL list
+                if self._acl[position_index] is None:
+                    _LOGGER.info(f"Inserting rule {new_rule} at position {position_index}")
+                    # Adds rule
+                    self._acl[position_index] = new_rule
+                else:
+                    # Cannot overwrite it
+                    _LOGGER.info(f"Error: inserting rule at non-empty position {position_index}")
+                    return
+            except Exception:
+                _LOGGER.info(f"New Rule could NOT be added to list at position {position_index}.")
+        else:
+            _LOGGER.info(f"Position {position_index} is an invalid/overwrites implicit firewall rule")
+
+    def remove_rule(
+        self, _permission: RulePermissionType, _source_ip: str, _dest_ip: str, _protocol: str, _port: str
+    ) -> None:
        """
        Removes a rule.

@@ -101,19 +142,21 @@ class AccessControlList:
            _protocol: the protocol
            _port: the port
        """
-        rule = ACLRule(_permission, _source_ip, _dest_ip, _protocol, str(_port))
-        hash_value = hash(rule)
-        # There will not always be something 'popable' since the agent will be trying random things
-        try:
-            self.acl.pop(hash_value)
-        except Exception:
-            return
+        rule_to_delete = ACLRule(_permission, _source_ip, _dest_ip, _protocol, str(_port))
+        delete_rule_hash = hash(rule_to_delete)

-    def remove_all_rules(self):
+        for index in range(0, len(self._acl)):
+            if isinstance(self._acl[index], ACLRule) and hash(self._acl[index]) == delete_rule_hash:
+                self._acl[index] = None
+
+    def remove_all_rules(self) -> None:
        """Removes all rules."""
-        self.acl.clear()
+        for i in range(len(self._acl)):
+            self._acl[i] = None

-    def get_dictionary_hash(self, _permission, _source_ip, _dest_ip, _protocol, _port):
+    def get_dictionary_hash(
+        self, _permission: RulePermissionType, _source_ip: str, _dest_ip: str, _protocol: str, _port: str
+    ) -> int:
        """
        Produces a hash value for a rule.

@@ -130,3 +173,26 @@ class AccessControlList:
        rule = ACLRule(_permission, _source_ip, _dest_ip, _protocol, str(_port))
        hash_value = hash(rule)
        return hash_value
+
+    def get_relevant_rules(
+        self, _source_ip_address: str, _dest_ip_address: str, _protocol: str, _port: str
+    ) -> Dict[int, ACLRule]:
+        """Get all ACL rules that relate to the given arguments.
+
+        :param _source_ip_address: the source IP address to check
+        :param _dest_ip_address: the destination IP address to check
+        :param _protocol: the protocol to check
+        :param _port: the port to check
+        :return: Dictionary of all ACL rules that relate to the given arguments
+        :rtype: Dict[int, ACLRule]
+        """
+        relevant_rules = {}
+        for rule in self.acl:
+            if self.check_address_match(rule, _source_ip_address, _dest_ip_address):
+                if (rule.get_protocol() == _protocol or rule.get_protocol() == "ANY" or _protocol == "ANY") and (
+                    str(rule.get_port()) == str(_port) or rule.get_port() == "ANY" or str(_port) == "ANY"
+                ):
+                    # There's a matching rule.
+                    relevant_rules[self._acl.index(rule)] = rule
+
+        return relevant_rules
--- a/src/primaite/acl/acl_rule.py
+++ b/src/primaite/acl/acl_rule.py
@@ -1,28 +1,30 @@
-# Crown Copyright (C) Dstl 2022. DEFCON 703. Shared in confidence.
+# © Crown-owned copyright 2023, Defence Science and Technology Laboratory UK
 """A class that implements an access control list rule."""
+from primaite.common.enums import RulePermissionType


 class ACLRule:
    """Access Control List Rule class."""

-    def __init__(self, _permission, _source_ip, _dest_ip, _protocol, _port):
+    def __init__(
+        self, _permission: RulePermissionType, _source_ip: str, _dest_ip: str, _protocol: str, _port: str
+    ) -> None:
        """
-        Init.
+        Initialise an ACL Rule.

-        Args:
-            _permission: The permission (ALLOW or DENY)
-            _source_ip: The source IP address
-            _dest_ip: The destination IP address
-            _protocol: The rule protocol
-            _port: The rule port
+        :param _permission: The permission (ALLOW or DENY)
+        :param _source_ip: The source IP address
+        :param _dest_ip: The destination IP address
+        :param _protocol: The rule protocol
+        :param _port: The rule port
        """
-        self.permission = _permission
-        self.source_ip = _source_ip
-        self.dest_ip = _dest_ip
-        self.protocol = _protocol
-        self.port = _port
+        self.permission: RulePermissionType = _permission
+        self.source_ip: str = _source_ip
+        self.dest_ip: str = _dest_ip
+        self.protocol: str = _protocol
+        self.port: str = _port

-    def __hash__(self):
+    def __hash__(self) -> int:
        """
        Override the hash function.

@@ -30,10 +32,16 @@ class ACLRule:
             Returns hash of core parameters.
        """
        return hash(
-            (self.permission, self.source_ip, self.dest_ip, self.protocol, self.port)
+            (
+                self.permission,
+                self.source_ip,
+                self.dest_ip,
+                self.protocol,
+                self.port,
+            )
        )

-    def get_permission(self):
+    def get_permission(self) -> str:
        """
        Gets the permission attribute.

@@ -42,7 +50,7 @@ class ACLRule:
        """
        return self.permission

-    def get_source_ip(self):
+    def get_source_ip(self) -> str:
        """
        Gets the source IP address attribute.

@@ -51,7 +59,7 @@ class ACLRule:
        """
        return self.source_ip

-    def get_dest_ip(self):
+    def get_dest_ip(self) -> str:
        """
        Gets the desintation IP address attribute.

@@ -60,7 +68,7 @@ class ACLRule:
        """
        return self.dest_ip

-    def get_protocol(self):
+    def get_protocol(self) -> str:
        """
        Gets the protocol attribute.

@@ -69,7 +77,7 @@ class ACLRule:
        """
        return self.protocol

-    def get_port(self):
+    def get_port(self) -> str:
        """
        Gets the port attribute.

--- a/src/primaite/agents/init.py
+++ b/src/primaite/agents/init.py
@@ -0,0 +1,2 @@
+# © Crown-owned copyright 2023, Defence Science and Technology Laboratory UK
+"""Common interface between RL agents from different libraries and PrimAITE."""
--- a/src/primaite/agents/agent_abc.py
+++ b/src/primaite/agents/agent_abc.py
@@ -0,0 +1,309 @@
+# © Crown-owned copyright 2023, Defence Science and Technology Laboratory UK
+from __future__ import annotations
+
+import json
+from abc import ABC, abstractmethod
+from datetime import datetime
+from logging import Logger
+from pathlib import Path
+from typing import Any, Dict, Optional, Union
+from uuid import uuid4
+
+import primaite
+from primaite import getLogger, PRIMAITE_PATHS
+from primaite.config import lay_down_config, training_config
+from primaite.config.training_config import TrainingConfig
+from primaite.data_viz.session_plots import plot_av_reward_per_episode
+from primaite.environment.primaite_env import Primaite
+from primaite.utils.session_metadata_parser import parse_session_metadata
+
+_LOGGER: Logger = getLogger(__name__)
+
+
+def get_session_path(session_timestamp: datetime) -> Path:
+    """
+    Get the directory path the session will output to.
+
+    This is set in the format of:
+        ~/primaite/2.0.0/sessions/<yyyy-mm-dd>/<yyyy-mm-dd>_<hh-mm-ss>.
+
+    :param session_timestamp: This is the datetime that the session started.
+    :return: The session directory path.
+    """
+    date_dir = session_timestamp.strftime("%Y-%m-%d")
+    session_path = session_timestamp.strftime("%Y-%m-%d_%H-%M-%S")
+    session_path = PRIMAITE_PATHS.user_sessions_path / date_dir / session_path
+    session_path.mkdir(exist_ok=True, parents=True)
+
+    return session_path
+
+
+class AgentSessionABC(ABC):
+    """
+    An ABC that manages training and/or evaluation of agents in PrimAITE.
+
+    This class cannot be directly instantiated and must be inherited from with all implemented abstract methods
+    implemented.
+    """
+
+    @abstractmethod
+    def __init__(
+        self,
+        training_config_path: Optional[Union[str, Path]] = None,
+        lay_down_config_path: Optional[Union[str, Path]] = None,
+        session_path: Optional[Union[str, Path]] = None,
+    ) -> None:
+        """
+        Initialise an agent session from config files, or load a previous session.
+
+        If training configuration and laydown configuration are provided with a session path,
+        the session path will be used.
+
+        :param training_config_path: YAML file containing configurable items defined in
+            `primaite.config.training_config.TrainingConfig`
+        :type training_config_path: Union[path, str]
+        :param lay_down_config_path: YAML file containing configurable items for generating network laydown.
+        :type lay_down_config_path: Union[path, str]
+        :param session_path: directory path of the session to load
+        """
+        # initialise variables
+        self._env: Primaite
+        self._agent = None
+        self._can_learn: bool = False
+        self._can_evaluate: bool = False
+        self.is_eval = False
+
+        self.session_timestamp: datetime = datetime.now()
+
+        # convert session to path
+        if session_path is not None:
+            if not isinstance(session_path, Path):
+                session_path = Path(session_path)
+
+            # if a session path is provided, load it
+            if not session_path.exists():
+                raise Exception(f"Session could not be loaded. Path does not exist: {session_path}")
+
+            # load session
+            self.load(session_path)
+        else:
+            # set training config path
+            if not isinstance(training_config_path, Path):
+                training_config_path = Path(training_config_path)
+            self._training_config_path: Union[Path, str] = training_config_path
+            self._training_config: TrainingConfig = training_config.load(self._training_config_path)
+
+            if not isinstance(lay_down_config_path, Path):
+                lay_down_config_path = Path(lay_down_config_path)
+            self._lay_down_config_path: Union[Path, str] = lay_down_config_path
+            self._lay_down_config: Dict = lay_down_config.load(self._lay_down_config_path)
+            self.sb3_output_verbose_level = self._training_config.sb3_output_verbose_level
+
+            # set random UUID for session
+            self._uuid = str(uuid4())
+            "The session timestamp"
+            self.session_path = get_session_path(self.session_timestamp)
+            "The Session path"
+
+    @property
+    def timestamp_str(self) -> str:
+        """The session timestamp as a string."""
+        return self.session_timestamp.strftime("%Y-%m-%d_%H-%M-%S")
+
+    @property
+    def learning_path(self) -> Path:
+        """The learning outputs path."""
+        path = self.session_path / "learning"
+        path.mkdir(exist_ok=True, parents=True)
+        return path
+
+    @property
+    def evaluation_path(self) -> Path:
+        """The evaluation outputs path."""
+        path = self.session_path / "evaluation"
+        path.mkdir(exist_ok=True, parents=True)
+        return path
+
+    @property
+    def checkpoints_path(self) -> Path:
+        """The Session checkpoints path."""
+        path = self.learning_path / "checkpoints"
+        path.mkdir(exist_ok=True, parents=True)
+        return path
+
+    @property
+    def uuid(self) -> str:
+        """The Agent Session UUID."""
+        return self._uuid
+
+    def _write_session_metadata_file(self) -> None:
+        """
+        Write the ``session_metadata.json`` file.
+
+        Creates a ``session_metadata.json`` in the ``session_path`` directory
+        and adds the following key/value pairs:
+
+        - uuid: The UUID assigned to the session upon instantiation.
+        - start_datetime: The date & time the session started in iso format.
+        - end_datetime: NULL.
+        - total_episodes: NULL.
+        - total_time_steps: NULL.
+        - env:
+            - training_config:
+                - All training config items
+            - lay_down_config:
+                - All lay down config items
+
+        """
+        metadata_dict = {
+            "uuid": self.uuid,
+            "start_datetime": self.session_timestamp.isoformat(),
+            "end_datetime": None,
+            "learning": {"total_episodes": None, "total_time_steps": None},
+            "evaluation": {"total_episodes": None, "total_time_steps": None},
+            "env": {
+                "training_config": self._training_config.to_dict(json_serializable=True),
+                "lay_down_config": self._lay_down_config,
+            },
+        }
+        filepath = self.session_path / "session_metadata.json"
+        _LOGGER.debug(f"Writing Session Metadata file: {filepath}")
+        with open(filepath, "w") as file:
+            json.dump(metadata_dict, file)
+            _LOGGER.debug("Finished writing session metadata file")
+
+    def _update_session_metadata_file(self) -> None:
+        """
+        Update the ``session_metadata.json`` file.
+
+        Updates the `session_metadata.json`` in the ``session_path`` directory
+        with the following key/value pairs:
+
+        - end_datetime: The date & time the session ended in iso format.
+        - total_episodes: The total number of training episodes completed.
+        - total_time_steps: The total number of training time steps completed.
+        """
+        with open(self.session_path / "session_metadata.json", "r") as file:
+            metadata_dict = json.load(file)
+
+        metadata_dict["end_datetime"] = datetime.now().isoformat()
+        if not self.is_eval:
+            metadata_dict["learning"]["total_episodes"] = self._env.actual_episode_count  # noqa
+            metadata_dict["learning"]["total_time_steps"] = self._env.total_step_count  # noqa
+        else:
+            metadata_dict["evaluation"]["total_episodes"] = self._env.actual_episode_count  # noqa
+            metadata_dict["evaluation"]["total_time_steps"] = self._env.total_step_count  # noqa
+
+        filepath = self.session_path / "session_metadata.json"
+        _LOGGER.debug(f"Updating Session Metadata file: {filepath}")
+        with open(filepath, "w") as file:
+            json.dump(metadata_dict, file)
+            _LOGGER.debug("Finished updating session metadata file")
+
+    @abstractmethod
+    def _setup(self) -> None:
+        _LOGGER.info(
+            "Welcome to the Primary-level AI Training Environment " f"(PrimAITE) (version: {primaite.__version__})"
+        )
+        _LOGGER.info(f"The output directory for this session is: {self.session_path}")
+        self._write_session_metadata_file()
+        self._can_learn = True
+        self._can_evaluate = False
+
+    @abstractmethod
+    def _save_checkpoint(self) -> None:
+        pass
+
+    @abstractmethod
+    def learn(
+        self,
+        **kwargs: Any,
+    ) -> None:
+        """
+        Train the agent.
+
+        :param kwargs: Any agent-specific key-word args to be passed.
+        """
+        if self._can_learn:
+            _LOGGER.info("Finished learning")
+            _LOGGER.debug("Writing transactions")
+            self._update_session_metadata_file()
+            self._can_evaluate = True
+            self.is_eval = False
+
+    @abstractmethod
+    def evaluate(
+        self,
+        **kwargs: Any,
+    ) -> None:
+        """
+        Evaluate the agent.
+
+        :param kwargs: Any agent-specific key-word args to be passed.
+        """
+        if self._can_evaluate:
+            self._update_session_metadata_file()
+            self.is_eval = True
+            self._plot_av_reward_per_episode(learning_session=False)
+            _LOGGER.info("Finished evaluation")
+
+    @abstractmethod
+    def _get_latest_checkpoint(self) -> None:
+        pass
+
+    def load(self, path: Union[str, Path]) -> None:
+        """Load an agent from file."""
+        md_dict, training_config_path, laydown_config_path = parse_session_metadata(path)
+
+        # set training config path
+        self._training_config_path: Union[Path, str] = training_config_path
+        self._training_config: TrainingConfig = training_config.load(self._training_config_path)
+        self._lay_down_config_path: Union[Path, str] = laydown_config_path
+        self._lay_down_config: Dict = lay_down_config.load(self._lay_down_config_path)
+        self.sb3_output_verbose_level = self._training_config.sb3_output_verbose_level
+
+        # set random UUID for session
+        self._uuid = md_dict["uuid"]
+
+        # set the session path
+        self.session_path = path
+        "The Session path"
+
+    @property
+    def _saved_agent_path(self) -> Path:
+        file_name = f"{self._training_config.agent_framework}_" f"{self._training_config.agent_identifier}" f".zip"
+        return self.learning_path / file_name
+
+    @abstractmethod
+    def save(self) -> None:
+        """Save the agent."""
+        pass
+
+    @abstractmethod
+    def export(self) -> None:
+        """Export the agent to transportable file format."""
+        pass
+
+    def close(self) -> None:
+        """Closes the agent."""
+        self._env.episode_av_reward_writer.close()  # noqa
+        self._env.transaction_writer.close()  # noqa
+
+    def _plot_av_reward_per_episode(self, learning_session: bool = True) -> None:
+        # self.close()
+        title = f"PrimAITE Session {self.timestamp_str} "
+        subtitle = str(self._training_config)
+        csv_file = f"average_reward_per_episode_{self.timestamp_str}.csv"
+        image_file = f"average_reward_per_episode_{self.timestamp_str}.png"
+        if learning_session:
+            title += "(Learning)"
+            path = self.learning_path / csv_file
+            image_path = self.learning_path / image_file
+        else:
+            title += "(Evaluation)"
+            path = self.evaluation_path / csv_file
+            image_path = self.evaluation_path / image_file
+
+        fig = plot_av_reward_per_episode(path, title, subtitle)
+        fig.write_image(image_path)
+        _LOGGER.debug(f"Saved average rewards per episode plot to: {path}")
--- a/src/primaite/agents/hardcoded_abc.py
+++ b/src/primaite/agents/hardcoded_abc.py
@@ -0,0 +1,118 @@
+# © Crown-owned copyright 2023, Defence Science and Technology Laboratory UK
+import time
+from abc import abstractmethod
+from pathlib import Path
+from typing import Any, Optional, Union
+
+import numpy as np
+
+from primaite import getLogger
+from primaite.agents.agent_abc import AgentSessionABC
+from primaite.environment.primaite_env import Primaite
+
+_LOGGER = getLogger(__name__)
+
+
+class HardCodedAgentSessionABC(AgentSessionABC):
+    """
+    An Agent Session ABC for evaluation deterministic agents.
+
+    This class cannot be directly instantiated and must be inherited from with all implemented abstract methods
+    implemented.
+    """
+
+    def __init__(
+        self,
+        training_config_path: Optional[Union[str, Path]] = "",
+        lay_down_config_path: Optional[Union[str, Path]] = "",
+        session_path: Optional[Union[str, Path]] = None,
+    ) -> None:
+        """
+        Initialise a hardcoded agent session.
+
+        :param training_config_path: YAML file containing configurable items defined in
+            `primaite.config.training_config.TrainingConfig`
+        :type training_config_path: Union[path, str]
+        :param lay_down_config_path: YAML file containing configurable items for generating network laydown.
+        :type lay_down_config_path: Union[path, str]
+        """
+        super().__init__(training_config_path, lay_down_config_path, session_path)
+        self._setup()
+
+    def _setup(self) -> None:
+        self._env: Primaite = Primaite(
+            training_config_path=self._training_config_path,
+            lay_down_config_path=self._lay_down_config_path,
+            session_path=self.session_path,
+            timestamp_str=self.timestamp_str,
+        )
+        super()._setup()
+        self._can_learn = False
+        self._can_evaluate = True
+
+    def _save_checkpoint(self) -> None:
+        pass
+
+    def _get_latest_checkpoint(self) -> None:
+        pass
+
+    def learn(
+        self,
+        **kwargs: Any,
+    ) -> None:
+        """
+        Train the agent.
+
+        :param kwargs: Any agent-specific key-word args to be passed.
+        """
+        _LOGGER.warning("Deterministic agents cannot learn")
+
+    @abstractmethod
+    def _calculate_action(self, obs: np.ndarray) -> None:
+        pass
+
+    def evaluate(
+        self,
+        **kwargs: Any,
+    ) -> None:
+        """
+        Evaluate the agent.
+
+        :param kwargs: Any agent-specific key-word args to be passed.
+        """
+        self._env.set_as_eval()  # noqa
+        self.is_eval = True
+
+        time_steps = self._training_config.num_eval_steps
+        episodes = self._training_config.num_eval_episodes
+
+        obs = self._env.reset()
+        for episode in range(episodes):
+            # Reset env and collect initial observation
+            for step in range(time_steps):
+                # Calculate action
+                action = self._calculate_action(obs)
+
+                # Perform the step
+                obs, reward, done, info = self._env.step(action)
+
+                if done:
+                    break
+
+                # Introduce a delay between steps
+                time.sleep(self._training_config.time_delay / 1000)
+            obs = self._env.reset()
+        self._env.close()
+
+    @classmethod
+    def load(cls, path: Union[str, Path] = None) -> None:
+        """Load an agent from file."""
+        _LOGGER.warning("Deterministic agents cannot be loaded")
+
+    def save(self) -> None:
+        """Save the agent."""
+        _LOGGER.warning("Deterministic agents cannot be saved")
+
+    def export(self) -> None:
+        """Export the agent to transportable file format."""
+        _LOGGER.warning("Deterministic agents cannot be exported")
--- a/src/primaite/agents/hardcoded_acl.py
+++ b/src/primaite/agents/hardcoded_acl.py
@@ -0,0 +1,515 @@
+# © Crown-owned copyright 2023, Defence Science and Technology Laboratory UK
+from typing import Dict, List, Union
+
+import numpy as np
+
+from primaite.acl.access_control_list import AccessControlList
+from primaite.acl.acl_rule import ACLRule
+from primaite.agents.hardcoded_abc import HardCodedAgentSessionABC
+from primaite.agents.utils import (
+    get_new_action,
+    get_node_of_ip,
+    transform_action_acl_enum,
+    transform_change_obs_readable,
+)
+from primaite.common.custom_typing import NodeUnion
+from primaite.common.enums import HardCodedAgentView
+from primaite.nodes.active_node import ActiveNode
+from primaite.nodes.service_node import ServiceNode
+from primaite.pol.ier import IER
+
+
+class HardCodedACLAgent(HardCodedAgentSessionABC):
+    """An Agent Session class that implements a deterministic ACL agent."""
+
+    def _calculate_action(self, obs: np.ndarray) -> int:
+        if self._training_config.hard_coded_agent_view == HardCodedAgentView.BASIC:
+            # Basic view action using only the current observation
+            return self._calculate_action_basic_view(obs)
+        else:
+            # full view action using observation space, action
+            # history and reward feedback
+            return self._calculate_action_full_view(obs)
+
+    def get_blocked_green_iers(
+        self, green_iers: Dict[str, IER], acl: AccessControlList, nodes: Dict[str, NodeUnion]
+    ) -> Dict[str, IER]:
+        """Get blocked green IERs.
+
+        :param green_iers: Green IERs to check for being
+        :type green_iers: Dict[str, IER]
+        :param acl: Firewall rules
+        :type acl: AccessControlList
+        :param nodes: Nodes in the network
+        :type nodes: Dict[str,NodeUnion]
+        :return: Same as `green_iers` input dict, but filtered to only contain the blocked ones.
+        :rtype: Dict[str, IER]
+        """
+        blocked_green_iers = {}
+
+        for green_ier_id, green_ier in green_iers.items():
+            source_node_id = green_ier.get_source_node_id()
+            source_node_address = nodes[source_node_id].ip_address
+            dest_node_id = green_ier.get_dest_node_id()
+            dest_node_address = nodes[dest_node_id].ip_address
+            protocol = green_ier.get_protocol()  # e.g. 'TCP'
+            port = green_ier.get_port()
+
+            # Can be blocked by an ACL or by default (no allow rule exists)
+            if acl.is_blocked(source_node_address, dest_node_address, protocol, port):
+                blocked_green_iers[green_ier_id] = green_ier
+
+        return blocked_green_iers
+
+    def get_matching_acl_rules_for_ier(
+        self, ier: IER, acl: AccessControlList, nodes: Dict[str, NodeUnion]
+    ) -> Dict[int, ACLRule]:
+        """Get list of ACL rules which are relevant to an IER.
+
+        :param ier: Information Exchange Request to query against the ACL list
+        :type ier: IER
+        :param acl: Firewall rules
+        :type acl: AccessControlList
+        :param nodes: Nodes in the network
+        :type nodes: Dict[str,NodeUnion]
+        :return: _description_
+        :rtype: _type_
+        """
+        source_node_id = ier.get_source_node_id()
+        source_node_address = nodes[source_node_id].ip_address
+        dest_node_id = ier.get_dest_node_id()
+        dest_node_address = nodes[dest_node_id].ip_address
+        protocol = ier.get_protocol()  # e.g. 'TCP'
+        port = ier.get_port()
+        matching_rules = acl.get_relevant_rules(source_node_address, dest_node_address, protocol, port)
+        return matching_rules
+
+    def get_blocking_acl_rules_for_ier(
+        self, ier: IER, acl: AccessControlList, nodes: Dict[str, NodeUnion]
+    ) -> Dict[int, ACLRule]:
+        """
+        Get blocking ACL rules for an IER.
+
+        .. warning::
+            Can return empty dict but IER can still be blocked by default
+            (No ALLOW rule, therefore blocked).
+
+        :param ier: Information Exchange Request to query against the ACL list
+        :type ier: IER
+        :param acl: Firewall rules
+        :type acl: AccessControlList
+        :param nodes: Nodes in the network
+        :type nodes: Dict[str,NodeUnion]
+        :return: _description_
+        :rtype: _type_
+        """
+        matching_rules = self.get_matching_acl_rules_for_ier(ier, acl, nodes)
+
+        blocked_rules = {}
+        for rule_key, rule_value in matching_rules.items():
+            if rule_value.get_permission() == "DENY":
+                blocked_rules[rule_key] = rule_value
+
+        return blocked_rules
+
+    def get_allow_acl_rules_for_ier(
+        self, ier: IER, acl: AccessControlList, nodes: Dict[str, NodeUnion]
+    ) -> Dict[int, ACLRule]:
+        """Get all allowing ACL rules for an IER.
+
+        :param ier: Information Exchange Request to query against the ACL list
+        :type ier: IER
+        :param acl: Firewall rules
+        :type acl: AccessControlList
+        :param nodes: Nodes in the network
+        :type nodes: Dict[str,NodeUnion]
+        :return: _description_
+        :rtype: _type_
+        """
+        matching_rules = self.get_matching_acl_rules_for_ier(ier, acl, nodes)
+
+        allowed_rules = {}
+        for rule_key, rule_value in matching_rules.items():
+            if rule_value.get_permission() == "ALLOW":
+                allowed_rules[rule_key] = rule_value
+
+        return allowed_rules
+
+    def get_matching_acl_rules(
+        self,
+        source_node_id: str,
+        dest_node_id: str,
+        protocol: str,
+        port: str,
+        acl: AccessControlList,
+        nodes: Dict[str, Union[ServiceNode, ActiveNode]],
+        services_list: List[str],
+    ) -> Dict[int, ACLRule]:
+        """Filter ACL rules to only those which are relevant to the specified nodes.
+
+        :param source_node_id: Source node
+        :type source_node_id: str
+        :param dest_node_id: Destination nodes
+        :type dest_node_id: str
+        :param protocol: Network protocol
+        :type protocol: str
+        :param port: Network port
+        :type port: str
+        :param acl: Access Control list which will be filtered
+        :type acl: AccessControlList
+        :param nodes: The environment's node directory.
+        :type nodes: Dict[str, Union[ServiceNode, ActiveNode]]
+        :param services_list: List of services registered for the environment.
+        :type services_list: List[str]
+        :return: Filtered version of 'acl'
+        :rtype: Dict[str, ACLRule]
+        """
+        if source_node_id != "ANY":
+            source_node_address = nodes[str(source_node_id)].ip_address
+        else:
+            source_node_address = source_node_id
+
+        if dest_node_id != "ANY":
+            dest_node_address = nodes[str(dest_node_id)].ip_address
+        else:
+            dest_node_address = dest_node_id
+
+        if protocol != "ANY":
+            protocol = services_list[protocol - 1]  # -1 as dont have to account for ANY in list of services
+            # TODO: This should throw an error because protocol is a string
+
+        matching_rules = acl.get_relevant_rules(source_node_address, dest_node_address, protocol, port)
+        return matching_rules
+
+    def get_allow_acl_rules(
+        self,
+        source_node_id: int,
+        dest_node_id: str,
+        protocol: int,
+        port: str,
+        acl: AccessControlList,
+        nodes: Dict[str, NodeUnion],
+        services_list: List[str],
+    ) -> Dict[int, ACLRule]:
+        """List ALLOW rules relating to specified nodes.
+
+        :param source_node_id: Source node id
+        :type source_node_id: int
+        :param dest_node_id: Destination node
+        :type dest_node_id: str
+        :param protocol: Network protocol
+        :type protocol: int
+        :param port: Port
+        :type port: str
+        :param acl: Firewall ruleset which is applied to the network
+        :type acl: AccessControlList
+        :param nodes: The simulation's node store
+        :type nodes: Dict[str, NodeUnion]
+        :param services_list: Services list
+        :type services_list: List[str]
+        :return: Filtered ACL Rule directory which includes only those rules which affect the specified source and
+            desination nodes
+        :rtype: Dict[str, ACLRule]
+        """
+        matching_rules = self.get_matching_acl_rules(
+            source_node_id,
+            dest_node_id,
+            protocol,
+            port,
+            acl,
+            nodes,
+            services_list,
+        )
+
+        allowed_rules = {}
+        for rule_key, rule_value in matching_rules.items():
+            if rule_value.get_permission() == "ALLOW":
+                allowed_rules[rule_key] = rule_value
+
+        return allowed_rules
+
+    def get_deny_acl_rules(
+        self,
+        source_node_id: int,
+        dest_node_id: str,
+        protocol: int,
+        port: str,
+        acl: AccessControlList,
+        nodes: Dict[str, NodeUnion],
+        services_list: List[str],
+    ) -> Dict[int, ACLRule]:
+        """List DENY rules relating to specified nodes.
+
+        :param source_node_id: Source node id
+        :type source_node_id: int
+        :param dest_node_id: Destination node
+        :type dest_node_id: str
+        :param protocol: Network protocol
+        :type protocol: int
+        :param port: Port
+        :type port: str
+        :param acl: Firewall ruleset which is applied to the network
+        :type acl: AccessControlList
+        :param nodes: The simulation's node store
+        :type nodes: Dict[str, NodeUnion]
+        :param services_list: Services list
+        :type services_list: List[str]
+        :return: Filtered ACL Rule directory which includes only those rules which affect the specified source and
+            desination nodes
+        :rtype: Dict[str, ACLRule]
+        """
+        matching_rules = self.get_matching_acl_rules(
+            source_node_id,
+            dest_node_id,
+            protocol,
+            port,
+            acl,
+            nodes,
+            services_list,
+        )
+
+        allowed_rules = {}
+        for rule_key, rule_value in matching_rules.items():
+            if rule_value.get_permission() == "DENY":
+                allowed_rules[rule_key] = rule_value
+
+        return allowed_rules
+
+    def _calculate_action_full_view(self, obs: np.ndarray) -> int:
+        """
+        Calculate a good acl-based action for the blue agent to take.
+
+        Knowledge of just the observation space is insufficient for a perfect solution, as we need to know:
+
+            - Which ACL rules already exist, - otherwise:
+                 - The agent would perminently get stuck in a loop of performing the same action over and over.
+                 (best action is to block something, but its already blocked but doesn't know this)
+                 - The agent would be unable to interact with existing rules (e.g. how would it know to delete a rule,
+                 if it doesnt know what rules exist)
+            - The Green IERs (optional) - It often needs to know which traffic it should be allowing. For example
+             in the default config one of the green IERs is blocked by default, but it has no way of knowing this
+             based on the observation space. Additionally, potentially in the future, once a node state
+             has been fixed (no longer compromised), it needs a way to know it should reallow traffic.
+             A RL agent can learn what the green IERs are on its own - but the rule based agent cannot easily do this.
+
+        There doesn't seem like there's much that can be done if an Operating or OS State is compromised
+
+        If a service node becomes compromised there's a decision to make - do we block that service?
+        Pros: It cannot launch an attack on another node, so the node will not be able to be OVERWHELMED
+        Cons: Will block a green IER, decreasing the reward
+        We decide to block the service.
+
+        Potentially a better solution (for the reward) would be to block the incomming traffic from compromised
+        nodes once a service becomes overwhelmed. However currently the ACL action space has no way of reversing
+        an overwhelmed state, so we don't do this.
+
+        :param obs: current observation from the gym environment
+        :type obs: np.ndarray
+        :return: Optimal action to take in the environment (chosen from the discrete action space)
+        :rtype: int
+        """
+        # obs = convert_to_old_obs(obs)
+        r_obs = transform_change_obs_readable(obs)
+        _, _, _, *s = r_obs
+
+        if len(r_obs) == 4:  # only 1 service
+            s = [*s]
+
+        # 1. Check if node is compromised. If so we want to block its outwards services
+        # a. If it is comprimised check if there's an allow rule we should delete.
+        #   cons: might delete a multi-rule from any source node (ANY -> x)
+        # b. OPTIONAL (Deny rules not needed): Check if there already exists an existing Deny Rule so not to duplicate
+        # c. OPTIONAL (no allow rule = blocked): Add a DENY rule
+        found_action = False
+        for service_num, service_states in enumerate(s):
+            for x, service_state in enumerate(service_states):
+                if service_state == "COMPROMISED":
+                    action_source_id = x + 1  # +1 as 0 is any
+                    action_destination_id = "ANY"
+                    action_protocol = service_num + 1  # +1 as 0 is any
+                    action_port = "ANY"
+
+                    allow_rules = self.get_allow_acl_rules(
+                        action_source_id,
+                        action_destination_id,
+                        action_protocol,
+                        action_port,
+                        self._env.acl,
+                        self._env.nodes,
+                        self._env.services_list,
+                    )
+                    deny_rules = self.get_deny_acl_rules(
+                        action_source_id,
+                        action_destination_id,
+                        action_protocol,
+                        action_port,
+                        self._env.acl,
+                        self._env.nodes,
+                        self._env.services_list,
+                    )
+                    if len(allow_rules) > 0:
+                        # Check if there's an allow rule we should delete
+                        rule = list(allow_rules.values())[0]
+                        action_decision = "DELETE"
+                        action_permission = "ALLOW"
+                        action_source_ip = rule.get_source_ip()
+                        action_source_id = int(get_node_of_ip(action_source_ip, self._env.nodes))
+                        action_destination_ip = rule.get_dest_ip()
+                        action_destination_id = int(get_node_of_ip(action_destination_ip, self._env.nodes))
+                        action_protocol_name = rule.get_protocol()
+                        action_protocol = (
+                            self._env.services_list.index(action_protocol_name) + 1
+                        )  # convert name e.g. 'TCP' to index
+                        action_port_name = rule.get_port()
+                        action_port = (
+                            self._env.ports_list.index(action_port_name) + 1
+                        )  # convert port name e.g. '80' to index
+
+                        found_action = True
+                        break
+                    elif len(deny_rules) > 0:
+                        # TODO OPTIONAL
+                        # If there's already a DENY RULE, that blocks EVERYTHING from the source ip we don't need
+                        # to create another
+                        # Check to see if the DENY rule really blocks everything (ANY) or just a specific rule
+                        continue
+                    else:
+                        # TODO OPTIONAL: Add a DENY rule, optional as by default no allow rule == blocked
+                        action_decision = "CREATE"
+                        action_permission = "DENY"
+                        break
+            if found_action:
+                break
+
+        # 2. If NO Node is Comprimised, or the node has already been blocked, check the green IERs and
+        #  add an Allow rule if the green IER is being blocked.
+        # a.  OPTIONAL - NOT IMPLEMENTED (optional as a deny rule does not overwrite an allow rule):
+        # If there's a DENY rule delete it if:
+        #    - There isn't already a deny rule
+        #    - It doesnt allows a comprimised node to become operational.
+        # b. Add an ALLOW rule if:
+        #     - There isn't already an allow rule
+        #     - It doesnt allows a comprimised node to become operational
+
+        if not found_action:
+            # Which Green IERS are blocked
+            blocked_green_iers = self.get_blocked_green_iers(self._env.green_iers, self._env.acl, self._env.nodes)
+            for ier_key, ier in blocked_green_iers.items():
+                # Which ALLOW rules are allowing this IER (none)
+                allowing_rules = self.get_allow_acl_rules_for_ier(ier, self._env.acl, self._env.nodes)
+
+                # If there are no blocking rules, it may be being blocked by default
+                # If there is already an allow rule
+                node_id_to_check = int(ier.get_source_node_id())
+                service_name_to_check = ier.get_protocol()
+                service_id_to_check = self._env.services_list.index(service_name_to_check)
+
+                # Service state of the the source node in the ier
+                service_state = s[service_id_to_check][node_id_to_check - 1]
+
+                if len(allowing_rules) == 0 and service_state != "COMPROMISED":
+                    action_decision = "CREATE"
+                    action_permission = "ALLOW"
+                    action_source_id = int(ier.get_source_node_id())
+                    action_destination_id = int(ier.get_dest_node_id())
+                    action_protocol_name = ier.get_protocol()
+                    action_protocol = (
+                        self._env.services_list.index(action_protocol_name) + 1
+                    )  # convert name e.g. 'TCP' to index
+                    action_port_name = ier.get_port()
+                    action_port = (
+                        self._env.ports_list.index(action_port_name) + 1
+                    )  # convert port name e.g. '80' to index
+
+                    found_action = True
+                    break
+
+        if found_action:
+            action = [
+                action_decision,
+                action_permission,
+                action_source_id,
+                action_destination_id,
+                action_protocol,
+                action_port,
+            ]
+            action = transform_action_acl_enum(action)
+            action = get_new_action(action, self._env.action_dict)
+        else:
+            # If no good/useful action has been found, just perform a nothing action
+            action = ["NONE", "ALLOW", "ANY", "ANY", "ANY", "ANY"]
+            action = transform_action_acl_enum(action)
+            action = get_new_action(action, self._env.action_dict)
+        return action
+
+    def _calculate_action_basic_view(self, obs: np.ndarray) -> int:
+        """
+        Calculate a good acl-based action for the blue agent to take.
+
+        Uses ONLY information from the current observation with NO knowledge
+        of previous actions taken and NO reward feedback.
+
+        We rely on randomness to select the precise action, as we want to
+        block all traffic originating from a compromised node, without being
+        able to tell:
+            1. Which ACL rules already exist
+            2. Which actions the agent has already tried.
+
+        There is a high probability that the correct rule will not be deleted
+        before the state becomes overwhelmed.
+
+        Currently, a deny rule does not overwrite an allow rule. The allow
+        rules must be deleted.
+
+        :param obs: current observation from the gym environment
+        :type obs: np.ndarray
+        :return: Optimal action to take in the environment (chosen from the discrete action space)
+        :rtype: int
+        """
+        action_dict = self._env.action_dict
+        r_obs = transform_change_obs_readable(obs)
+        _, o, _, *s = r_obs
+
+        if len(r_obs) == 4:  # only 1 service
+            s = [*s]
+
+        number_of_nodes = len([i for i in o if i != "NONE"])  # number of nodes (not links)
+        for service_num, service_states in enumerate(s):
+            comprimised_states = [n for n, i in enumerate(service_states) if i == "COMPROMISED"]
+            if len(comprimised_states) == 0:
+                # No states are COMPROMISED, try the next service
+                continue
+
+            compromised_node = np.random.choice(comprimised_states) + 1  # +1 as 0 would be any
+            action_decision = "DELETE"
+            action_permission = "ALLOW"
+            action_source_ip = compromised_node
+            # Randomly select a destination ID to block
+            action_destination_ip = np.random.choice(list(range(1, number_of_nodes + 1)) + ["ANY"])
+            action_destination_ip = (
+                int(action_destination_ip) if action_destination_ip != "ANY" else action_destination_ip
+            )
+            action_protocol = service_num + 1  # +1 as 0 is any
+            # Randomly select a port to block
+            # Bad assumption that number of protocols equals number of ports
+            # AND no rules exist with an ANY port
+            action_port = np.random.choice(list(range(1, len(s) + 1)))
+
+            action = [
+                action_decision,
+                action_permission,
+                action_source_ip,
+                action_destination_ip,
+                action_protocol,
+                action_port,
+            ]
+            action = transform_action_acl_enum(action)
+            action = get_new_action(action, action_dict)
+            # We can only perform 1 action on each step
+            return action
+
+        # If no good/useful action has been found, just perform a nothing action
+        nothing_action = ["NONE", "ALLOW", "ANY", "ANY", "ANY", "ANY"]
+        nothing_action = transform_action_acl_enum(nothing_action)
+        nothing_action = get_new_action(nothing_action, action_dict)
+        return nothing_action
--- a/src/primaite/agents/hardcoded_node.py
+++ b/src/primaite/agents/hardcoded_node.py
@@ -0,0 +1,125 @@
+# © Crown-owned copyright 2023, Defence Science and Technology Laboratory UK
+import numpy as np
+
+from primaite.agents.hardcoded_abc import HardCodedAgentSessionABC
+from primaite.agents.utils import get_new_action, transform_action_node_enum, transform_change_obs_readable
+
+
+class HardCodedNodeAgent(HardCodedAgentSessionABC):
+    """An Agent Session class that implements a deterministic Node agent."""
+
+    def _calculate_action(self, obs: np.ndarray) -> int:
+        """
+        Calculate a good node-based action for the blue agent to take.
+
+        :param obs: current observation from the gym environment
+        :type obs: np.ndarray
+        :return: Optimal action to take in the environment (chosen from the discrete action space)
+        :rtype: int
+        """
+        action_dict = self._env.action_dict
+        r_obs = transform_change_obs_readable(obs)
+        _, o, os, *s = r_obs
+
+        if len(r_obs) == 4:  # only 1 service
+            s = [*s]
+
+        # Check in order of most important states (order doesn't currently
+        # matter, but it probably should)
+        # First see if any OS states are compromised
+        for x, os_state in enumerate(os):
+            if os_state == "COMPROMISED":
+                action_node_id = x + 1
+                action_node_property = "OS"
+                property_action = "PATCHING"
+                action_service_index = 0  # does nothing isn't relevant for os
+                action = [
+                    action_node_id,
+                    action_node_property,
+                    property_action,
+                    action_service_index,
+                ]
+                action = transform_action_node_enum(action)
+                action = get_new_action(action, action_dict)
+                # We can only perform 1 action on each step
+                return action
+
+        # Next, see if any Services are compromised
+        # We fix the compromised state before overwhelemd state,
+        # If a compromised entry node is fixed before the overwhelmed state is triggered, instruction is ignored
+        for service_num, service in enumerate(s):
+            for x, service_state in enumerate(service):
+                if service_state == "COMPROMISED":
+                    action_node_id = x + 1
+                    action_node_property = "SERVICE"
+                    property_action = "PATCHING"
+                    action_service_index = service_num
+
+                    action = [
+                        action_node_id,
+                        action_node_property,
+                        property_action,
+                        action_service_index,
+                    ]
+                    action = transform_action_node_enum(action)
+                    action = get_new_action(action, action_dict)
+                    # We can only perform 1 action on each step
+                    return action
+
+        # Next, See if any services are overwhelmed
+        # perhaps this should be fixed automatically when the compromised PCs issues are also resolved
+        # Currently there's no reason that an Overwhelmed state cannot be resolved before resolving the compromised PCs
+
+        for service_num, service in enumerate(s):
+            for x, service_state in enumerate(service):
+                if service_state == "OVERWHELMED":
+                    action_node_id = x + 1
+                    action_node_property = "SERVICE"
+                    property_action = "PATCHING"
+                    action_service_index = service_num
+
+                    action = [
+                        action_node_id,
+                        action_node_property,
+                        property_action,
+                        action_service_index,
+                    ]
+                    action = transform_action_node_enum(action)
+                    action = get_new_action(action, action_dict)
+                    # We can only perform 1 action on each step
+                    return action
+
+        # Finally, turn on any off nodes
+        for x, operating_state in enumerate(o):
+            if os_state == "OFF":
+                action_node_id = x + 1
+                action_node_property = "OPERATING"
+                property_action = "ON"  # Why reset it when we can just turn it on
+                action_service_index = 0  # does nothing isn't relevant for operating state
+                action = [
+                    action_node_id,
+                    action_node_property,
+                    property_action,
+                    action_service_index,
+                ]
+                # TODO: transform_action_node_enum takes only one argument, not sure why two are given here.
+                action = transform_action_node_enum(action, action_dict)
+                action = get_new_action(action, action_dict)
+                # We can only perform 1 action on each step
+                return action
+
+        # If no good actions, just go with an action that wont do any harm
+        action_node_id = 1
+        action_node_property = "NONE"
+        property_action = "ON"
+        action_service_index = 0
+        action = [
+            action_node_id,
+            action_node_property,
+            property_action,
+            action_service_index,
+        ]
+        action = transform_action_node_enum(action)
+        action = get_new_action(action, action_dict)
+
+        return action
--- a/src/primaite/agents/rllib.py
+++ b/src/primaite/agents/rllib.py
@@ -0,0 +1,286 @@
+# © Crown-owned copyright 2023, Defence Science and Technology Laboratory UK
+from __future__ import annotations
+
+import json
+import shutil
+import zipfile
+from datetime import datetime
+from logging import Logger
+from pathlib import Path
+from typing import Any, Callable, Dict, Optional, Union
+from uuid import uuid4
+
+from ray.rllib.algorithms import Algorithm
+from ray.rllib.algorithms.a2c import A2CConfig
+from ray.rllib.algorithms.ppo import PPOConfig
+from ray.tune.logger import UnifiedLogger
+from ray.tune.registry import register_env
+
+from primaite import getLogger
+from primaite.agents.agent_abc import AgentSessionABC
+from primaite.common.enums import AgentFramework, AgentIdentifier, SessionType
+from primaite.environment.primaite_env import Primaite
+from primaite.exceptions import RLlibAgentError
+
+_LOGGER: Logger = getLogger(__name__)
+
+
+# TODO: verify type of env_config
+def _env_creator(env_config: Dict[str, Any]) -> Primaite:
+    return Primaite(
+        training_config_path=env_config["training_config_path"],
+        lay_down_config_path=env_config["lay_down_config_path"],
+        session_path=env_config["session_path"],
+        timestamp_str=env_config["timestamp_str"],
+    )
+
+
+# TODO: verify type hint return type
+def _custom_log_creator(session_path: Path) -> Callable[[Dict], UnifiedLogger]:
+    logdir = session_path / "ray_results"
+    logdir.mkdir(parents=True, exist_ok=True)
+
+    def logger_creator(config: Dict) -> UnifiedLogger:
+        return UnifiedLogger(config, logdir, loggers=None)
+
+    return logger_creator
+
+
+class RLlibAgent(AgentSessionABC):
+    """An AgentSession class that implements a Ray RLlib agent."""
+
+    def __init__(
+        self,
+        training_config_path: Optional[Union[str, Path]] = "",
+        lay_down_config_path: Optional[Union[str, Path]] = "",
+        session_path: Optional[Union[str, Path]] = None,
+    ) -> None:
+        """
+        Initialise the RLLib Agent training session.
+
+        :param training_config_path: YAML file containing configurable items defined in
+            `primaite.config.training_config.TrainingConfig`
+        :type training_config_path: Union[path, str]
+        :param lay_down_config_path: YAML file containing configurable items for generating network laydown.
+        :type lay_down_config_path: Union[path, str]
+        :raises ValueError: If the training config contains an unexpected value for agent_framework (should be "RLLIB")
+        :raises ValueError: If the training config contains an unexpected value for agent_identifies (should be `PPO`
+            or `A2C`)
+        """
+        # TODO: implement RLlib agent loading
+        if session_path is not None:
+            msg = "RLlib agent loading has not been implemented yet"
+            _LOGGER.critical(msg)
+            raise NotImplementedError(msg)
+
+        super().__init__(training_config_path, lay_down_config_path)
+        if self._training_config.session_type == SessionType.EVAL:
+            msg = "Cannot evaluate an RLlib agent that hasn't been through training yet."
+            _LOGGER.critical(msg)
+            raise RLlibAgentError(msg)
+        if not self._training_config.agent_framework == AgentFramework.RLLIB:
+            msg = f"Expected RLLIB agent_framework, " f"got {self._training_config.agent_framework}"
+            _LOGGER.error(msg)
+            raise ValueError(msg)
+        self._agent_config_class: Union[PPOConfig, A2CConfig]
+        if self._training_config.agent_identifier == AgentIdentifier.PPO:
+            self._agent_config_class = PPOConfig
+        elif self._training_config.agent_identifier == AgentIdentifier.A2C:
+            self._agent_config_class = A2CConfig
+        else:
+            msg = "Expected PPO or A2C agent_identifier, " f"got {self._training_config.agent_identifier.value}"
+            _LOGGER.error(msg)
+            raise ValueError(msg)
+        self._agent_config: Union[PPOConfig, A2CConfig]
+
+        self._current_result: dict
+        self._setup()
+        _LOGGER.debug(
+            f"Created {self.__class__.__name__} using: "
+            f"agent_framework={self._training_config.agent_framework}, "
+            f"agent_identifier="
+            f"{self._training_config.agent_identifier}, "
+            f"deep_learning_framework="
+            f"{self._training_config.deep_learning_framework}"
+        )
+        self._train_agent = None  # Required to capture the learning agent to close after eval
+
+    def _update_session_metadata_file(self) -> None:
+        """
+        Update the ``session_metadata.json`` file.
+
+        Updates the `session_metadata.json`` in the ``session_path`` directory
+        with the following key/value pairs:
+
+        - end_datetime: The date & time the session ended in iso format.
+        - total_episodes: The total number of training episodes completed.
+        - total_time_steps: The total number of training time steps completed.
+        """
+        with open(self.session_path / "session_metadata.json", "r") as file:
+            metadata_dict = json.load(file)
+
+        metadata_dict["end_datetime"] = datetime.now().isoformat()
+        if not self.is_eval:
+            metadata_dict["learning"]["total_episodes"] = self._current_result["episodes_total"]  # noqa
+            metadata_dict["learning"]["total_time_steps"] = self._current_result["timesteps_total"]  # noqa
+        else:
+            metadata_dict["evaluation"]["total_episodes"] = self._current_result["episodes_total"]  # noqa
+            metadata_dict["evaluation"]["total_time_steps"] = self._current_result["timesteps_total"]  # noqa
+
+        filepath = self.session_path / "session_metadata.json"
+        _LOGGER.debug(f"Updating Session Metadata file: {filepath}")
+        with open(filepath, "w") as file:
+            json.dump(metadata_dict, file)
+            _LOGGER.debug("Finished updating session metadata file")
+
+    def _setup(self) -> None:
+        super()._setup()
+        register_env("primaite", _env_creator)
+        self._agent_config = self._agent_config_class()
+
+        self._agent_config.environment(
+            env="primaite",
+            env_config=dict(
+                training_config_path=self._training_config_path,
+                lay_down_config_path=self._lay_down_config_path,
+                session_path=self.session_path,
+                timestamp_str=self.timestamp_str,
+            ),
+        )
+        self._agent_config.seed = self._training_config.seed
+
+        self._agent_config.training(train_batch_size=self._training_config.num_train_steps)
+        self._agent_config.framework(framework="tf")
+
+        self._agent_config.rollouts(
+            num_rollout_workers=1,
+            num_envs_per_worker=1,
+            horizon=self._training_config.num_train_steps,
+        )
+        self._agent: Algorithm = self._agent_config.build(logger_creator=_custom_log_creator(self.learning_path))
+
+    def _save_checkpoint(self) -> None:
+        checkpoint_n = self._training_config.checkpoint_every_n_episodes
+        episode_count = self._current_result["episodes_total"]
+        save_checkpoint = False
+        if checkpoint_n:
+            save_checkpoint = episode_count % checkpoint_n == 0
+        if episode_count and save_checkpoint:
+            self._agent.save(str(self.checkpoints_path))
+
+    def learn(
+        self,
+        **kwargs: Any,
+    ) -> None:
+        """
+        Evaluate the agent.
+
+        :param kwargs: Any agent-specific key-word args to be passed.
+        """
+        time_steps = self._training_config.num_train_steps
+        episodes = self._training_config.num_train_episodes
+
+        _LOGGER.info(f"Beginning learning for {episodes} episodes @" f" {time_steps} time steps...")
+        for i in range(episodes):
+            self._current_result = self._agent.train()
+            self._save_checkpoint()
+        self.save()
+        super().learn()
+        # Done this way as the RLlib eval can only be performed if the session hasn't been stopped
+        if self._training_config.session_type is not SessionType.TRAIN:
+            self._train_agent = self._agent
+        else:
+            self._agent.stop()
+            self._plot_av_reward_per_episode(learning_session=True)
+
+    def _unpack_saved_agent_into_eval(self) -> Path:
+        """Unpacks the pre-trained and saved RLlib agent so that it can be reloaded by Ray for eval."""
+        agent_restore_path = self.evaluation_path / "agent_restore"
+        if agent_restore_path.exists():
+            shutil.rmtree(agent_restore_path)
+        agent_restore_path.mkdir()
+        with zipfile.ZipFile(self._saved_agent_path, "r") as zip_file:
+            zip_file.extractall(agent_restore_path)
+        return agent_restore_path
+
+    def _setup_eval(self):
+        self._can_learn = False
+        self._can_evaluate = True
+        self._agent.restore(str(self._unpack_saved_agent_into_eval()))
+
+    def evaluate(
+        self,
+        **kwargs,
+    ):
+        """
+        Evaluate the agent.
+
+        :param kwargs: Any agent-specific key-word args to be passed.
+        """
+        time_steps = self._training_config.num_eval_steps
+        episodes = self._training_config.num_eval_episodes
+
+        self._setup_eval()
+
+        self._env: Primaite = Primaite(
+            self._training_config_path, self._lay_down_config_path, self.session_path, self.timestamp_str
+        )
+
+        self._env.set_as_eval()
+        self.is_eval = True
+        if self._training_config.deterministic:
+            deterministic_str = "deterministic"
+        else:
+            deterministic_str = "non-deterministic"
+        _LOGGER.info(
+            f"Beginning {deterministic_str} evaluation for " f"{episodes} episodes @ {time_steps} time steps..."
+        )
+        for episode in range(episodes):
+            obs = self._env.reset()
+            for step in range(time_steps):
+                action = self._agent.compute_single_action(observation=obs, explore=False)
+
+                obs, rewards, done, info = self._env.step(action)
+
+        self._env.reset()
+        self._env.close()
+        super().evaluate()
+        # Now we're safe to close the learning agent and write the mean rewards per episode for it
+        if self._training_config.session_type is not SessionType.TRAIN:
+            self._train_agent.stop()
+            self._plot_av_reward_per_episode(learning_session=True)
+        # Perform a clean-up of the unpacked agent
+        if (self.evaluation_path / "agent_restore").exists():
+            shutil.rmtree((self.evaluation_path / "agent_restore"))
+
+    def _get_latest_checkpoint(self) -> None:
+        raise NotImplementedError
+
+    @classmethod
+    def load(cls, path: Union[str, Path]) -> RLlibAgent:
+        """Load an agent from file."""
+        raise NotImplementedError
+
+    def save(self, overwrite_existing: bool = True) -> None:
+        """Save the agent."""
+        # Make temp dir to save in isolation
+        temp_dir = self.learning_path / str(uuid4())
+        temp_dir.mkdir()
+
+        # Save the agent to the temp dir
+        self._agent.save(str(temp_dir))
+
+        # Capture the saved Rllib checkpoint inside the temp directory
+        for file in temp_dir.iterdir():
+            checkpoint_dir = file
+            break
+
+        # Zip the folder
+        shutil.make_archive(str(self._saved_agent_path).replace(".zip", ""), "zip", checkpoint_dir)  # noqa
+
+        # Drop the temp directory
+        shutil.rmtree(temp_dir)
+
+    def export(self) -> None:
+        """Export the agent to transportable file format."""
+        raise NotImplementedError
--- a/src/primaite/agents/sb3.py
+++ b/src/primaite/agents/sb3.py
@@ -0,0 +1,196 @@
+# © Crown-owned copyright 2023, Defence Science and Technology Laboratory UK
+from __future__ import annotations
+
+import json
+from logging import Logger
+from pathlib import Path
+from typing import Any, Optional, Union
+
+import numpy as np
+from stable_baselines3 import A2C, PPO
+from stable_baselines3.ppo import MlpPolicy as PPOMlp
+
+from primaite import getLogger
+from primaite.agents.agent_abc import AgentSessionABC
+from primaite.common.enums import AgentFramework, AgentIdentifier
+from primaite.environment.primaite_env import Primaite
+
+_LOGGER: Logger = getLogger(__name__)
+
+
+class SB3Agent(AgentSessionABC):
+    """An AgentSession class that implements a Stable Baselines3 agent."""
+
+    def __init__(
+        self,
+        training_config_path: Optional[Union[str, Path]] = None,
+        lay_down_config_path: Optional[Union[str, Path]] = None,
+        session_path: Optional[Union[str, Path]] = None,
+    ) -> None:
+        """
+        Initialise the SB3 Agent training session.
+
+        :param training_config_path: YAML file containing configurable items defined in
+            `primaite.config.training_config.TrainingConfig`
+        :type training_config_path: Union[path, str]
+        :param lay_down_config_path: YAML file containing configurable items for generating network laydown.
+        :type lay_down_config_path: Union[path, str]
+        :raises ValueError: If the training config contains an unexpected value for agent_framework (should be "SB3")
+        :raises ValueError: If the training config contains an unexpected value for agent_identifies (should be `PPO`
+            or `A2C`)
+        """
+        super().__init__(training_config_path, lay_down_config_path, session_path)
+        if not self._training_config.agent_framework == AgentFramework.SB3:
+            msg = f"Expected SB3 agent_framework, " f"got {self._training_config.agent_framework}"
+            _LOGGER.error(msg)
+            raise ValueError(msg)
+        self._agent_class: Union[PPO, A2C]
+        if self._training_config.agent_identifier == AgentIdentifier.PPO:
+            self._agent_class = PPO
+        elif self._training_config.agent_identifier == AgentIdentifier.A2C:
+            self._agent_class = A2C
+        else:
+            msg = "Expected PPO or A2C agent_identifier, " f"got {self._training_config.agent_identifier}"
+            _LOGGER.error(msg)
+            raise ValueError(msg)
+
+        self._tensorboard_log_path = self.learning_path / "tensorboard_logs"
+        self._tensorboard_log_path.mkdir(parents=True, exist_ok=True)
+
+        _LOGGER.debug(
+            f"Created {self.__class__.__name__} using: "
+            f"agent_framework={self._training_config.agent_framework}, "
+            f"agent_identifier="
+            f"{self._training_config.agent_identifier}"
+        )
+
+        self.is_eval = False
+
+        self._setup()
+
+    def _setup(self) -> None:
+        """Set up the SB3 Agent."""
+        self._env = Primaite(
+            training_config_path=self._training_config_path,
+            lay_down_config_path=self._lay_down_config_path,
+            session_path=self.session_path,
+            timestamp_str=self.timestamp_str,
+        )
+
+        # check if there is a zip file that needs to be loaded
+        load_file = next(self.session_path.rglob("*.zip"), None)
+
+        if not load_file:
+            # create a new env and agent
+
+            self._agent = self._agent_class(
+                PPOMlp,
+                self._env,
+                verbose=self.sb3_output_verbose_level,
+                n_steps=self._training_config.num_train_steps,
+                tensorboard_log=str(self._tensorboard_log_path),
+                seed=self._training_config.seed,
+            )
+        else:
+            # set env values from session metadata
+            with open(self.session_path / "session_metadata.json", "r") as file:
+                md_dict = json.load(file)
+
+            # load environment values
+            if self.is_eval:
+                # evaluation always starts at 0
+                self._env.episode_count = 0
+                self._env.total_step_count = 0
+            else:
+                # carry on from previous learning sessions
+                self._env.episode_count = md_dict["learning"]["total_episodes"]
+                self._env.total_step_count = md_dict["learning"]["total_time_steps"]
+
+            # load the file
+            self._agent = self._agent_class.load(load_file, env=self._env)
+
+            # set agent values
+            self._agent.verbose = self.sb3_output_verbose_level
+            self._agent.tensorboard_log = self.session_path / "learning/tensorboard_logs"
+
+        super()._setup()
+
+    def _save_checkpoint(self) -> None:
+        checkpoint_n = self._training_config.checkpoint_every_n_episodes
+        episode_count = self._env.episode_count
+        save_checkpoint = False
+        if checkpoint_n:
+            save_checkpoint = episode_count % checkpoint_n == 0
+        if episode_count and save_checkpoint:
+            checkpoint_path = self.checkpoints_path / f"sb3ppo_{episode_count}.zip"
+            self._agent.save(checkpoint_path)
+            _LOGGER.debug(f"Saved agent checkpoint: {checkpoint_path}")
+
+    def _get_latest_checkpoint(self) -> None:
+        pass
+
+    def learn(
+        self,
+        **kwargs: Any,
+    ) -> None:
+        """
+        Train the agent.
+
+        :param kwargs: Any agent-specific key-word args to be passed.
+        """
+        time_steps = self._training_config.num_train_steps
+        episodes = self._training_config.num_train_episodes
+        self.is_eval = False
+        _LOGGER.info(f"Beginning learning for {episodes} episodes @" f" {time_steps} time steps...")
+        for i in range(episodes):
+            self._agent.learn(total_timesteps=time_steps)
+            self._save_checkpoint()
+        self._env._write_av_reward_per_episode()  # noqa
+        self.save()
+        self._env.close()
+        super().learn()
+
+        # save agent
+        self.save()
+
+        self._plot_av_reward_per_episode(learning_session=True)
+
+    def evaluate(
+        self,
+        **kwargs: Any,
+    ) -> None:
+        """
+        Evaluate the agent.
+
+        :param kwargs: Any agent-specific key-word args to be passed.
+        """
+        time_steps = self._training_config.num_eval_steps
+        episodes = self._training_config.num_eval_episodes
+        self._env.set_as_eval()
+        self.is_eval = True
+        if self._training_config.deterministic:
+            deterministic_str = "deterministic"
+        else:
+            deterministic_str = "non-deterministic"
+        _LOGGER.info(
+            f"Beginning {deterministic_str} evaluation for " f"{episodes} episodes @ {time_steps} time steps..."
+        )
+        for episode in range(episodes):
+            obs = self._env.reset()
+
+            for step in range(time_steps):
+                action, _states = self._agent.predict(obs, deterministic=self._training_config.deterministic)
+                if isinstance(action, np.ndarray):
+                    action = np.int64(action)
+                obs, rewards, done, info = self._env.step(action)
+        self._env._write_av_reward_per_episode()  # noqa
+        self._env.close()
+        super().evaluate()
+
+    def save(self) -> None:
+        """Save the agent."""
+        self._agent.save(self._saved_agent_path)
+
+    def export(self) -> None:
+        """Export the agent to transportable file format."""
+        raise NotImplementedError
--- a/src/primaite/agents/simple.py
+++ b/src/primaite/agents/simple.py
@@ -0,0 +1,59 @@
+# © Crown-owned copyright 2023, Defence Science and Technology Laboratory UK
+
+import numpy as np
+
+from primaite.agents.hardcoded_abc import HardCodedAgentSessionABC
+from primaite.agents.utils import get_new_action, transform_action_acl_enum, transform_action_node_enum
+
+
+class RandomAgent(HardCodedAgentSessionABC):
+    """
+    A Random Agent.
+
+    Get a completely random action from the action space.
+    """
+
+    def _calculate_action(self, obs: np.ndarray) -> int:
+        return self._env.action_space.sample()
+
+
+class DummyAgent(HardCodedAgentSessionABC):
+    """
+    A Dummy Agent.
+
+    All action spaces setup so dummy action is always 0 regardless of action type used.
+    """
+
+    def _calculate_action(self, obs: np.ndarray) -> int:
+        return 0
+
+
+class DoNothingACLAgent(HardCodedAgentSessionABC):
+    """
+    A do nothing ACL agent.
+
+    A valid ACL action that has no effect; does nothing.
+    """
+
+    def _calculate_action(self, obs: np.ndarray) -> int:
+        nothing_action = ["NONE", "ALLOW", "ANY", "ANY", "ANY", "ANY"]
+        nothing_action = transform_action_acl_enum(nothing_action)
+        nothing_action = get_new_action(nothing_action, self._env.action_dict)
+
+        return nothing_action
+
+
+class DoNothingNodeAgent(HardCodedAgentSessionABC):
+    """
+    A do nothing Node agent.
+
+    A valid Node action that has no effect; does nothing.
+    """
+
+    def _calculate_action(self, obs: np.ndarray) -> int:
+        nothing_action = [1, "NONE", "ON", 0]
+        nothing_action = transform_action_node_enum(nothing_action)
+        nothing_action = get_new_action(nothing_action, self._env.action_dict)
+        # nothing_action should currently always be 0
+
+        return nothing_action
--- a/src/primaite/agents/utils.py
+++ b/src/primaite/agents/utils.py
@@ -0,0 +1,450 @@
+# © Crown-owned copyright 2023, Defence Science and Technology Laboratory UK
+from typing import Dict, List, Union
+
+import numpy as np
+
+from primaite.common.custom_typing import NodeUnion
+from primaite.common.enums import (
+    HardwareState,
+    LinkStatus,
+    NodeHardwareAction,
+    NodePOLType,
+    NodeSoftwareAction,
+    SoftwareState,
+)
+
+
+def transform_action_node_readable(action: List[int]) -> List[Union[int, str]]:
+    """Convert a node action from enumerated format to readable format.
+
+    example:
+    [1, 3, 1, 0] -> [1, 'SERVICE', 'PATCHING', 0]
+
+    :param action: Agent action, formatted as a list of ints, for more information check out
+        `primaite.environment.primaite_env.Primaite`
+    :type action: List[int]
+    :return: The same action list, but with the encodings translated back into meaningful labels
+    :rtype: List[Union[int,str]]
+    """
+    action_node_property = NodePOLType(action[1]).name
+
+    if action_node_property == "OPERATING":
+        property_action = NodeHardwareAction(action[2]).name
+    elif (action_node_property == "OS" or action_node_property == "SERVICE") and action[2] <= 1:
+        property_action = NodeSoftwareAction(action[2]).name
+    else:
+        property_action = "NONE"
+
+    new_action: list[Union[int, str]] = [action[0], action_node_property, property_action, action[3]]
+    return new_action
+
+
+def transform_action_acl_readable(action: List[int]) -> List[Union[str, int]]:
+    """
+    Transform an ACL action to a more readable format.
+
+    example:
+    [0, 1, 2, 5, 0, 1] -> ['NONE', 'ALLOW', 2, 5, 'ANY', 1]
+
+    :param action: Agent action, formatted as a list of ints, for more information check out
+        `primaite.environment.primaite_env.Primaite`
+    :type action: List[int]
+    :return: The same action list, but with the encodings translated back into meaningful labels
+    :rtype: List[Union[int,str]]
+    """
+    action_decisions = {0: "NONE", 1: "CREATE", 2: "DELETE"}
+    action_permissions = {0: "DENY", 1: "ALLOW"}
+
+    action_decision = action_decisions[action[0]]
+    action_permission = action_permissions[action[1]]
+
+    # For IPs, Ports and Protocols, 0 means any, otherwise its just an index
+    new_action = [action_decision, action_permission] + list(action[2:6])
+    for n, val in enumerate(list(action[2:6])):
+        if val == 0:
+            new_action[n + 2] = "ANY"
+
+    return new_action
+
+
+def is_valid_node_action(action: List[int]) -> bool:
+    """
+    Is the node action an actual valid action.
+
+    Only uses information about the action to determine if the action has an effect
+
+    Does NOT consider:
+    - Node ID not valid to perform an operation - e.g. selected node has no service so cannot patch
+    - Node already being in that state (turning an ON node ON)
+
+    :param action: Agent action, formatted as a list of ints, for more information check out
+        `primaite.environment.primaite_env.Primaite`
+    :type action: List[int]
+    :return: Whether the action is valid
+    :rtype: bool
+    """
+    action_r = transform_action_node_readable(action)
+
+    node_property = action_r[1]
+    node_action = action_r[2]
+
+    # print("node property", node_property, "\nnode action", node_action)
+
+    if node_property == "NONE":
+        return False
+    if node_action == "NONE":
+        return False
+    if node_property == "OPERATING" and node_action == "PATCHING":
+        # Operating State cannot PATCH
+        return False
+    if node_property != "OPERATING" and node_action not in [
+        "NONE",
+        "PATCHING",
+    ]:
+        # Software States can only do Nothing or Patch
+        return False
+    return True
+
+
+def is_valid_acl_action(action: List[int]) -> bool:
+    """
+    Is the ACL action an actual valid action.
+
+    Only uses information about the action to determine if the action has an effect.
+
+    Does NOT consider:
+        - Trying to create identical rules
+        - Trying to create a rule which is a subset of another rule (caused by "ANY")
+
+    :param action: Agent action, formatted as a list of ints, for more information check out
+        `primaite.environment.primaite_env.Primaite`
+    :type action: List[int]
+    :return: Whether the action is valid
+    :rtype: bool
+    """
+    action_r = transform_action_acl_readable(action)
+
+    action_decision = action_r[0]
+    action_permission = action_r[1]
+    action_source_id = action_r[2]
+    action_destination_id = action_r[3]
+
+    if action_decision == "NONE":
+        return False
+    if action_source_id == action_destination_id and action_source_id != "ANY" and action_destination_id != "ANY":
+        # ACL rule towards itself
+        return False
+    if action_permission == "DENY":
+        # DENY is unnecessary, we can create and delete allow rules instead
+        # No allow rule = blocked/DENY by feault. ALLOW overrides existing DENY.
+        return False
+
+    return True
+
+
+def is_valid_acl_action_extra(action: List[int]) -> bool:
+    """
+    Harsher version of valid acl actions, does not allow action.
+
+    :param action: Agent action, formatted as a list of ints, for more information check out
+        `primaite.environment.primaite_env.Primaite`
+    :type action: List[int]
+    :return: Whether the action is valid
+    :rtype: bool
+    """
+    if is_valid_acl_action(action) is False:
+        return False
+
+    action_r = transform_action_acl_readable(action)
+    action_protocol = action_r[4]
+    action_port = action_r[5]
+
+    # Don't allow protocols or ports to be ANY
+    # in the future we might want to do the opposite, and only have ANY option for ports and service
+    if action_protocol == "ANY":
+        return False
+    if action_port == "ANY":
+        return False
+
+    return True
+
+
+def transform_change_obs_readable(obs: np.ndarray) -> List[List[Union[str, int]]]:
+    """Transform list of transactions to readable list of each observation property.
+
+    example:
+    np.array([[1,2,1,3],[2,1,1,1]]) -> [[1, 2], ['OFF', 'ON'], ['GOOD', 'GOOD'], ['COMPROMISED', 'GOOD']]
+
+    :param obs: Raw observation from the environment.
+    :type obs: np.ndarray
+    :return: The same observation, but the encoded integer values are replaced with readable names.
+    :rtype: List[List[Union[str, int]]]
+    """
+    ids = [i for i in obs[:, 0]]
+    operating_states = [HardwareState(i).name for i in obs[:, 1]]
+    os_states = [SoftwareState(i).name for i in obs[:, 2]]
+    new_obs = [ids, operating_states, os_states]
+
+    for service in range(4, obs.shape[1]):
+        # Links bit/s don't have a service state
+        service_states = [SoftwareState(i).name if i <= 4 else i for i in obs[:, service]]
+        new_obs.append(service_states)
+
+    return new_obs
+
+
+def transform_obs_readable(obs: np.ndarray) -> List[List[Union[str, int]]]:
+    """Transform observation to readable format.
+
+    example
+    np.array([[1,2,1,3],[2,1,1,1]]) -> [[1, 'OFF', 'GOOD', 'COMPROMISED'], [2, 'ON', 'GOOD', 'GOOD']]
+
+    :param obs: Raw observation from the environment.
+    :type obs: np.ndarray
+    :return: The same observation, but the encoded integer values are replaced with readable names.
+    :rtype: List[List[Union[str, int]]]
+    """
+    changed_obs = transform_change_obs_readable(obs)
+    new_obs = list(zip(*changed_obs))
+    # Convert list of tuples to list of lists
+    new_obs = [list(i) for i in new_obs]
+
+    return new_obs
+
+
+def convert_to_new_obs(obs: np.ndarray, num_nodes: int = 10) -> np.ndarray:
+    """Convert original gym Box observation space to new multiDiscrete observation space.
+
+    :param obs: observation in the 'old' (NodeLinkTable) format
+    :type obs: np.ndarray
+    :param num_nodes: number of nodes in the network, defaults to 10
+    :type num_nodes: int, optional
+    :return: reformatted observation
+    :rtype: np.ndarray
+    """
+    # Remove ID columns, remove links and flatten to MultiDiscrete observation space
+    new_obs = obs[:num_nodes, 1:].flatten()
+    return new_obs
+
+
+def convert_to_old_obs(obs: np.ndarray, num_nodes: int = 10, num_links: int = 10, num_services: int = 1) -> np.ndarray:
+    """Convert to old observation.
+
+    Links filled with 0's as no information is included in new observation space.
+
+    example:
+    obs = array([1, 1, 1, 1, 1, 1, 1, 1, 1,  ..., 1, 1, 1])
+
+    new_obs = array([[ 1,  1,  1,  1],
+                     [ 2,  1,  1,  1],
+                     [ 3,  1,  1,  1],
+                     ...
+                    [20,  0,  0,  0]])
+
+    :param obs: observation in the 'new' (MultiDiscrete) format
+    :type obs: np.ndarray
+    :param num_nodes: number of nodes in the network, defaults to 10
+    :type num_nodes: int, optional
+    :param num_links: number of links in the network, defaults to 10
+    :type num_links: int, optional
+    :param num_services: number of services on the network, defaults to 1
+    :type num_services: int, optional
+    :return: 2-d BOX observation space, in the same format as NodeLinkTable
+    :rtype: np.ndarray
+    """
+    # Convert back to more readable, original format
+    reshaped_nodes = obs[:-num_links].reshape(num_nodes, num_services + 2)
+
+    # Add empty links back and add node ID back
+    s = np.zeros(
+        [reshaped_nodes.shape[0] + num_links, reshaped_nodes.shape[1] + 1],
+        dtype=np.int64,
+    )
+    s[:, 0] = range(1, num_nodes + num_links + 1)  # Adding ID back
+    s[:num_nodes, 1:] = reshaped_nodes  # put values back in
+    new_obs = s
+
+    # Add links back in
+    links = obs[-num_links:]
+    # Links will be added to the last protocol/service slot but they are not specific to that service
+    new_obs[num_nodes:, -1] = links
+
+    return new_obs
+
+
+def describe_obs_change(
+    obs1: np.ndarray, obs2: np.ndarray, num_nodes: int = 10, num_links: int = 10, num_services: int = 1
+) -> str:
+    """Build a string describing the difference between two observations.
+
+    example:
+    obs_1 = array([[1, 1, 1, 1, 3], [2, 1, 1, 1, 1]])
+    obs_2 = array([[1, 1, 1, 1, 1], [2, 1, 1, 1, 1]])
+    output = 'ID 1: SERVICE 2 set to GOOD'
+
+    :param obs1: First observation
+    :type obs1: np.ndarray
+    :param obs2: Second observation
+    :type obs2: np.ndarray
+    :param num_nodes: How many nodes are in the network laydown, defaults to 10
+    :type num_nodes: int, optional
+    :param num_links: How many links are in the network laydown, defaults to 10
+    :type num_links: int, optional
+    :param num_services: How many services are configured for this scenario, defaults to 1
+    :type num_services: int, optional
+    :return: A multi-line string with a human-readable description of the difference.
+    :rtype: str
+    """
+    obs1 = convert_to_old_obs(obs1, num_nodes, num_links, num_services)
+    obs2 = convert_to_old_obs(obs2, num_nodes, num_links, num_services)
+    list_of_changes = []
+    for n, row in enumerate(obs1 - obs2):
+        if row.any() != 0:
+            relevant_changes = np.where(row != 0, obs2[n], -1)
+            relevant_changes[0] = obs2[n, 0]  # ID is always relevant
+            is_link = relevant_changes[0] > num_nodes
+            desc = _describe_obs_change_helper(relevant_changes, is_link)
+            list_of_changes.append(desc)
+
+    change_string = "\n ".join(list_of_changes)
+    if len(list_of_changes) > 0:
+        change_string = "\n " + change_string
+    return change_string
+
+
+def _describe_obs_change_helper(obs_change: List[int], is_link: bool) -> str:
+    """
+    Helper funcion to describe what has changed.
+
+    example:
+    [ 1 -1 -1 -1  1] -> "ID 1: Service 1 changed to GOOD"
+
+    Handles multiple changes e.g. 'ID 1: SERVICE 1 changed to PATCHING. SERVICE 2 set to GOOD.'
+
+    :param obs_change: List of integers generated within the `describe_obs_change` function. It should correspond to one
+        row of the observation table, and have `-1` at locations where the observation hasn't changed, and the new
+        status where it has changed.
+    :type obs_change: List[int]
+    :param is_link: Whether the row of the observation space corresponds to a link. False means it represents a node.
+    :type is_link: bool
+    :return: A human-readable description of the difference between the two observation rows.
+    :rtype: str
+    """
+    # Indexes where a change has occured, not including 0th index
+    index_changed = [i for i in range(1, len(obs_change)) if obs_change[i] != -1]
+    # Node pol types, Indexes >= 3 are service nodes
+    NodePOLTypes = [NodePOLType(i).name if i < 3 else NodePOLType(3).name + " " + str(i - 3) for i in index_changed]
+    # Account for hardware states, software sattes and links
+    states = [
+        LinkStatus(obs_change[i]).name
+        if is_link
+        else HardwareState(obs_change[i]).name
+        if i == 1
+        else SoftwareState(obs_change[i]).name
+        for i in index_changed
+    ]
+
+    if not is_link:
+        desc = f"ID {obs_change[0]}:"
+        for node_pol_type, state in list(zip(NodePOLTypes, states)):
+            desc = desc + " " + node_pol_type + " changed to " + state + "."
+    else:
+        desc = f"ID {obs_change[0]}: Link traffic changed to {states[0]}."
+
+    return desc
+
+
+def transform_action_node_enum(action: List[Union[str, int]]) -> List[int]:
+    """Convert a node action from readable string format, to enumerated format.
+
+    example:
+    [1, 'SERVICE', 'PATCHING', 0] -> [1, 3, 1, 0]
+    :param action: Action in 'readable' format
+    :type action: List[Union[str,int]]
+    :return: Action with verbs encoded as ints
+    :rtype: List[int]
+    """
+    action_node_id = action[0]
+    action_node_property = NodePOLType[action[1]].value
+
+    if action[1] == "OPERATING":
+        property_action = NodeHardwareAction[action[2]].value
+    elif action[1] == "OS" or action[1] == "SERVICE":
+        property_action = NodeSoftwareAction[action[2]].value
+    else:
+        property_action = 0
+
+    action_service_index = action[3]
+
+    new_action = [
+        action_node_id,
+        action_node_property,
+        property_action,
+        action_service_index,
+    ]
+
+    return new_action
+
+
+def transform_action_acl_enum(action: List[Union[int, str]]) -> np.ndarray:
+    """
+    Convert acl action from readable str format, to enumerated format.
+
+    :param action: ACL-based action expressed as a list of human-readable ints and strings
+    :type action: List[Union[int,str]]
+    :return: The same action but encoded to contain only integers.
+    :rtype: np.ndarray
+    """
+    action_decisions = {"NONE": 0, "CREATE": 1, "DELETE": 2}
+    action_permissions = {"DENY": 0, "ALLOW": 1}
+
+    action_decision = action_decisions[action[0]]
+    action_permission = action_permissions[action[1]]
+
+    # For IPs, Ports and Protocols, ANY has value 0, otherwise its just an index
+    new_action = [action_decision, action_permission] + list(action[2:6])
+    for n, val in enumerate(list(action[2:6])):
+        if val == "ANY":
+            new_action[n + 2] = 0
+
+    new_action = np.array(new_action)
+    return new_action
+
+
+def get_node_of_ip(ip: str, node_dict: Dict[str, NodeUnion]) -> str:
+    """Get the node ID of an IP address.
+
+    node_dict: dictionary of nodes where key is ID, and value is the node (can be ontained from env.nodes)
+
+    :param ip: The IP address of the node whose ID is required
+    :type ip: str
+    :param node_dict: The environment's node registry dictionary
+    :type node_dict: Dict[str,NodeUnion]
+    :return: The key from the registry dict that corresponds to the node with the IP adress provided by `ip`
+    :rtype: str
+    """
+    for node_key, node_value in node_dict.items():
+        node_ip = node_value.ip_address
+        if node_ip == ip:
+            return node_key
+
+
+def get_new_action(old_action: np.ndarray, action_dict: Dict[int, List]) -> int:
+    """
+    Get new action (e.g. 32) from old action e.g. [1,1,1,0].
+
+    Old_action can be either node or acl action type
+
+    :param old_action: Action expressed as a list of choices, eg. [1,1,1,0]
+    :type old_action: np.ndarray
+    :param action_dict: Dictionary for translating the multidiscrete actions into the list-based actions.
+    :type action_dict: Dict[int,List]
+    :return: Action key correspoinding to the input `old_action`
+    :rtype: int
+    """
+    for key, val in action_dict.items():
+        if list(val) == list(old_action):
+            return key
+    # Not all possible actions are included in dict, only valid action are
+    # if action is not in the dict, its an invalid action so return 0
+    return 0
--- a/src/primaite/cli.py
+++ b/src/primaite/cli.py
@@ -0,0 +1,198 @@
+# © Crown-owned copyright 2023, Defence Science and Technology Laboratory UK
+"""Provides a CLI using Typer as an entry point."""
+import logging
+import os
+from enum import Enum
+from typing import Optional
+
+import typer
+import yaml
+from typing_extensions import Annotated
+
+from primaite import PRIMAITE_PATHS
+from primaite.data_viz import PlotlyTemplate
+
+app = typer.Typer()
+
+
+@app.command()
+def build_dirs() -> None:
+    """Build the PrimAITE app directories."""
+    from primaite.setup import setup_app_dirs
+
+    setup_app_dirs.run()
+
+
+@app.command()
+def reset_notebooks(overwrite: bool = True) -> None:
+    """
+    Force a reset of the demo notebooks in the users notebooks directory.
+
+    :param overwrite: If True, will overwrite existing demo notebooks.
+    """
+    from primaite.setup import reset_demo_notebooks
+
+    reset_demo_notebooks.run(overwrite)
+
+
+@app.command()
+def logs(last_n: Annotated[int, typer.Option("-n")]) -> None:
+    """
+    Print the PrimAITE log file.
+
+    :param last_n: The number of lines to print. Default value is 10.
+    """
+    import re
+
+    from primaite import PRIMAITE_PATHS
+
+    if os.path.isfile(PRIMAITE_PATHS.app_log_file_path):
+        with open(PRIMAITE_PATHS.app_log_file_path) as file:
+            lines = file.readlines()
+        for line in lines[-last_n:]:
+            print(re.sub(r"\n*", "", line))
+
+
+_LogLevel = Enum("LogLevel", {k: k for k in logging._levelToName.values()})  # noqa
+
+
+@app.command()
+def log_level(level: Annotated[Optional[_LogLevel], typer.Argument()] = None) -> None:
+    """
+    View or set the PrimAITE Log Level.
+
+    To View, simply call: primaite log-level
+
+    To set, call: primaite log-level <desired log level>
+
+    For example, to set the to debug, call: primaite log-level DEBUG
+    """
+    if PRIMAITE_PATHS.app_config_file_path.exists():
+        with open(PRIMAITE_PATHS.app_config_file_path, "r") as file:
+            primaite_config = yaml.safe_load(file)
+
+        if level:
+            primaite_config["logging"]["log_level"] = level.value
+            with open(PRIMAITE_PATHS.app_config_file_path, "w") as file:
+                yaml.dump(primaite_config, file)
+            print(f"PrimAITE Log Level: {level}")
+        else:
+            level = primaite_config["logging"]["log_level"]
+            print(f"PrimAITE Log Level: {level}")
+
+
+@app.command()
+def notebooks() -> None:
+    """Start Jupyter Lab in the users PrimAITE notebooks directory."""
+    from primaite.notebooks import start_jupyter_session
+
+    start_jupyter_session()
+
+
+@app.command()
+def version() -> None:
+    """Get the installed PrimAITE version number."""
+    import primaite
+
+    print(primaite.__version__)
+
+
+@app.command()
+def clean_up() -> None:
+    """Cleans up left over files from previous version installations."""
+    from primaite.setup import old_installation_clean_up
+
+    old_installation_clean_up.run()
+
+
+@app.command()
+def setup(overwrite_existing: bool = True) -> None:
+    """
+    Perform the PrimAITE first-time setup.
+
+    WARNING: All user-data will be lost.
+    """
+    from primaite import getLogger
+    from primaite.setup import old_installation_clean_up, reset_demo_notebooks, reset_example_configs
+
+    _LOGGER = getLogger(__name__)
+
+    _LOGGER.info("Performing the PrimAITE first-time setup...")
+
+    _LOGGER.info("Building primaite_config.yaml...")
+
+    _LOGGER.info("Building the PrimAITE app directories...")
+    PRIMAITE_PATHS.mkdirs()
+
+    _LOGGER.info("Rebuilding the demo notebooks...")
+    reset_demo_notebooks.run(overwrite_existing=True)
+
+    _LOGGER.info("Rebuilding the example notebooks...")
+    reset_example_configs.run(overwrite_existing=True)
+
+    _LOGGER.info("Performing a clean-up of previous PrimAITE installations...")
+    old_installation_clean_up.run()
+
+    _LOGGER.info("PrimAITE setup complete!")
+
+
+@app.command()
+def session(tc: Optional[str] = None, ldc: Optional[str] = None, load: Optional[str] = None) -> None:
+    """
+    Run a PrimAITE session.
+
+    tc: The training config filepath. Optional. If no value is passed then
+    example default training config is used from:
+    ~/primaite/2.0.0/config/example_config/training/training_config_main.yaml.
+
+    ldc: The lay down config file path. Optional. If no value is passed then
+    example default lay down config is used from:
+    ~/primaite/2.0.0/config/example_config/lay_down/lay_down_config_3_doc_very_basic.yaml.
+
+    load: The directory of a previous session. Optional. If no value is passed, then the session
+    will use the default training config and laydown config. Inversely, if a training config and laydown config
+    is passed while a session directory is passed, PrimAITE will load the session and ignore the training config
+    and laydown config.
+    """
+    from primaite.config.lay_down_config import dos_very_basic_config_path
+    from primaite.config.training_config import main_training_config_path
+    from primaite.main import run
+
+    if load is not None:
+        # run a loaded session
+        run(session_path=load)
+
+    else:
+        # start a new session using tc and ldc
+        if not tc:
+            tc = main_training_config_path()
+
+        if not ldc:
+            ldc = dos_very_basic_config_path()
+
+        run(training_config_path=tc, lay_down_config_path=ldc)
+
+
+@app.command()
+def plotly_template(template: Annotated[Optional[PlotlyTemplate], typer.Argument()] = None) -> None:
+    """
+    View or set the plotly template for Session plots.
+
+    To View, simply call: primaite plotly-template
+
+    To set, call: primaite plotly-template <desired template>
+
+    For example, to set as plotly_dark, call: primaite plotly-template PLOTLY_DARK
+    """
+    if PRIMAITE_PATHS.app_config_file_path.exists():
+        with open(PRIMAITE_PATHS.app_config_file_path, "r") as file:
+            primaite_config = yaml.safe_load(file)
+
+        if template:
+            primaite_config["session"]["outputs"]["plots"]["template"] = template.value
+            with open(PRIMAITE_PATHS.app_config_file_path, "w") as file:
+                yaml.dump(primaite_config, file)
+            print(f"PrimAITE plotly template: {template.value}")
+        else:
+            template = primaite_config["session"]["outputs"]["plots"]["template"]
+            print(f"PrimAITE plotly template: {template}")
--- a/src/primaite/common/init.py
+++ b/src/primaite/common/init.py
@@ -1 +1,2 @@
-# Crown Copyright (C) Dstl 2022. DEFCON 703. Shared in confidence.
+# © Crown-owned copyright 2023, Defence Science and Technology Laboratory UK
+"""Objects which are shared between many PrimAITE modules."""
--- a/src/primaite/common/config_values_main.py
+++ b/src/primaite/common/config_values_main.py
@@ -1,90 +0,0 @@
-# Crown Copyright (C) Dstl 2022. DEFCON 703. Shared in confidence.
-"""The config class."""
-
-
-class config_values_main(object):
-    """Class to hold main config values."""
-
-    def __init__(self):
-        """Init."""
-        # Generic
-        self.agent_identifier = ""  # the agent in use
-        self.num_episodes = 0  # number of episodes to train over
-        self.num_steps = 0  # number of steps in an episode
-        self.time_delay = 0  # delay between steps (ms) - applies to generic agents only
-        self.config_filename_use_case = ""  # the filename for the Use Case config file
-        self.session_type = ""  # the session type to run (TRAINING or EVALUATION)
-
-        # Environment
-        self.observation_space_high_value = (
-            0  # The high value for the observation space
-        )
-
-        # Reward values
-        # Generic
-        self.all_ok = 0
-        # Node Operating State
-        self.off_should_be_on = 0
-        self.off_should_be_resetting = 0
-        self.on_should_be_off = 0
-        self.on_should_be_resetting = 0
-        self.resetting_should_be_on = 0
-        self.resetting_should_be_off = 0
-        self.resetting = 0
-        # Node O/S or Service State
-        self.good_should_be_patching = 0
-        self.good_should_be_compromised = 0
-        self.good_should_be_overwhelmed = 0
-        self.patching_should_be_good = 0
-        self.patching_should_be_compromised = 0
-        self.patching_should_be_overwhelmed = 0
-        self.patching = 0
-        self.compromised_should_be_good = 0
-        self.compromised_should_be_patching = 0
-        self.compromised_should_be_overwhelmed = 0
-        self.compromised = 0
-        self.overwhelmed_should_be_good = 0
-        self.overwhelmed_should_be_patching = 0
-        self.overwhelmed_should_be_compromised = 0
-        self.overwhelmed = 0
-        # Node File System State
-        self.good_should_be_repairing = 0
-        self.good_should_be_restoring = 0
-        self.good_should_be_corrupt = 0
-        self.good_should_be_destroyed = 0
-        self.repairing_should_be_good = 0
-        self.repairing_should_be_restoring = 0
-        self.repairing_should_be_corrupt = 0
-        self.repairing_should_be_destroyed = (
-            0  # Repairing does not fix destroyed state - you need to restore
-        )
-        self.repairing = 0
-        self.restoring_should_be_good = 0
-        self.restoring_should_be_repairing = 0
-        self.restoring_should_be_corrupt = (
-            0  # Not the optimal method (as repair will fix corruption)
-        )
-        self.restoring_should_be_destroyed = 0
-        self.restoring = 0
-        self.corrupt_should_be_good = 0
-        self.corrupt_should_be_repairing = 0
-        self.corrupt_should_be_restoring = 0
-        self.corrupt_should_be_destroyed = 0
-        self.corrupt = 0
-        self.destroyed_should_be_good = 0
-        self.destroyed_should_be_repairing = 0
-        self.destroyed_should_be_restoring = 0
-        self.destroyed_should_be_corrupt = 0
-        self.destroyed = 0
-        self.scanning = 0
-        # IER status
-        self.red_ier_running = 0
-        self.green_ier_blocked = 0
-
-        # Patching / Reset
-        self.os_patching_duration = 0  # The time taken to patch the OS
-        self.node_reset_duration = 0  # The time taken to reset a node (hardware)
-        self.service_patching_duration = 0  # The time taken to patch a service
-        self.file_system_repairing_limit = 0  # The time take to repair a file
-        self.file_system_restoring_limit = 0  # The time take to restore a file
-        self.file_system_scanning_limit = 0  # The time taken to scan the file system
--- a/src/primaite/common/custom_typing.py
+++ b/src/primaite/common/custom_typing.py
@@ -0,0 +1,8 @@
+from typing import Union
+
+from primaite.nodes.active_node import ActiveNode
+from primaite.nodes.passive_node import PassiveNode
+from primaite.nodes.service_node import ServiceNode
+
+NodeUnion = Union[ActiveNode, PassiveNode, ServiceNode]
+"""A Union of ActiveNode, PassiveNode, and ServiceNode."""
--- a/src/primaite/common/enums.py
+++ b/src/primaite/common/enums.py
@@ -1,10 +1,10 @@
-# Crown Copyright (C) Dstl 2022. DEFCON 703. Shared in confidence.
+# © Crown-owned copyright 2023, Defence Science and Technology Laboratory UK
 """Enumerations for APE."""

-from enum import Enum
+from enum import Enum, IntEnum


-class TYPE(Enum):
+class NodeType(Enum):
    """Node type enumeration."""

    CCTV = 1
@@ -19,7 +19,7 @@ class TYPE(Enum):
    SERVER = 10


-class PRIORITY(Enum):
+class Priority(Enum):
    """Node priority enumeration."""

    P1 = 1
@@ -29,33 +29,38 @@ class PRIORITY(Enum):
    P5 = 5


-class HARDWARE_STATE(Enum):
+class HardwareState(Enum):
    """Node hardware state enumeration."""

+    NONE = 0
    ON = 1
    OFF = 2
    RESETTING = 3
+    SHUTTING_DOWN = 4
+    BOOTING = 5


-class SOFTWARE_STATE(Enum):
-    """O/S or Service state enumeration."""
+class SoftwareState(Enum):
+    """Software or Service state enumeration."""

+    NONE = 0
    GOOD = 1
    PATCHING = 2
    COMPROMISED = 3
    OVERWHELMED = 4


-class NODE_POL_TYPE(Enum):
+class NodePOLType(Enum):
    """Node Pattern of Life type enumeration."""

+    NONE = 0
    OPERATING = 1
    OS = 2
    SERVICE = 3
    FILE = 4


-class NODE_POL_INITIATOR(Enum):
+class NodePOLInitiator(Enum):
    """Node Pattern of Life initiator enumeration."""

    DIRECT = 1
@@ -63,7 +68,7 @@ class NODE_POL_INITIATOR(Enum):
    SERVICE = 3


-class PROTOCOL(Enum):
+class Protocol(Enum):
    """Service protocol enumeration."""

    LDAP = 0
@@ -76,14 +81,82 @@ class PROTOCOL(Enum):
    NONE = 7


-class ACTION_TYPE(Enum):
+class SessionType(Enum):
+    """The type of PrimAITE Session to be run."""
+
+    TRAIN = 1
+    "Train an agent"
+    EVAL = 2
+    "Evaluate an agent"
+    TRAIN_EVAL = 3
+    "Train then evaluate an agent"
+
+
+class AgentFramework(Enum):
+    """The agent algorithm framework/package."""
+
+    CUSTOM = 0
+    "Custom Agent"
+    SB3 = 1
+    "Stable Baselines3"
+    RLLIB = 2
+    "Ray RLlib"
+
+
+class DeepLearningFramework(Enum):
+    """The deep learning framework."""
+
+    TF = "tf"
+    "Tensorflow"
+    TF2 = "tf2"
+    "Tensorflow 2.x"
+    TORCH = "torch"
+    "PyTorch"
+
+
+class AgentIdentifier(Enum):
+    """The Red Agent algo/class."""
+
+    A2C = 1
+    "Advantage Actor Critic"
+    PPO = 2
+    "Proximal Policy Optimization"
+    HARDCODED = 3
+    "The Hardcoded agents"
+    DO_NOTHING = 4
+    "The DoNothing agents"
+    RANDOM = 5
+    "The RandomAgent"
+    DUMMY = 6
+    "The DummyAgent"
+
+
+class HardCodedAgentView(Enum):
+    """The view the deterministic hard-coded agent has of the environment."""
+
+    BASIC = 1
+    "The current observation space only"
+    FULL = 2
+    "Full environment view with actions taken and reward feedback"
+
+
+class ActionType(Enum):
    """Action type enumeration."""

    NODE = 0
    ACL = 1
+    ANY = 2


-class FILE_SYSTEM_STATE(Enum):
+# TODO: this is not used anymore, write a ticket to delete it.
+class ObservationType(Enum):
+    """Observation type enumeration."""
+
+    BOX = 0
+    MULTIDISCRETE = 1
+
+
+class FileSystemState(Enum):
    """File System State."""

    GOOD = 1
@@ -91,3 +164,45 @@ class FILE_SYSTEM_STATE(Enum):
    DESTROYED = 3
    REPAIRING = 4
    RESTORING = 5
+
+
+class NodeHardwareAction(Enum):
+    """Node hardware action."""
+
+    NONE = 0
+    ON = 1
+    OFF = 2
+    RESET = 3
+
+
+class NodeSoftwareAction(Enum):
+    """Node software action."""
+
+    NONE = 0
+    PATCHING = 1
+
+
+class LinkStatus(Enum):
+    """Link traffic status."""
+
+    NONE = 0
+    LOW = 1
+    MEDIUM = 2
+    HIGH = 3
+    OVERLOAD = 4
+
+
+class SB3OutputVerboseLevel(IntEnum):
+    """The Stable Baselines3 learn/eval output verbosity level."""
+
+    NONE = 0
+    INFO = 1
+    DEBUG = 2
+
+
+class RulePermissionType(Enum):
+    """Any firewall rule type."""
+
+    NONE = 0
+    DENY = 1
+    ALLOW = 2
--- a/src/primaite/common/protocol.py
+++ b/src/primaite/common/protocol.py
@@ -1,21 +1,21 @@
-# Crown Copyright (C) Dstl 2022. DEFCON 703. Shared in confidence.
+# © Crown-owned copyright 2023, Defence Science and Technology Laboratory UK
 """The protocol class."""


 class Protocol(object):
    """Protocol class."""

-    def __init__(self, _name):
+    def __init__(self, _name: str) -> None:
        """
-        Init.
+        Initialise a protocol.

-        Args:
-            _name: The protocol name
+        :param _name: The name of the protocol
+        :type _name: str
        """
-        self.name = _name
-        self.load = 0  # bps
+        self.name: str = _name
+        self.load: int = 0  # bps

-    def get_name(self):
+    def get_name(self) -> str:
        """
        Gets the protocol name.

@@ -24,7 +24,7 @@ class Protocol(object):
        """
        return self.name

-    def get_load(self):
+    def get_load(self) -> int:
        """
        Gets the protocol load.

@@ -33,7 +33,7 @@ class Protocol(object):
        """
        return self.load

-    def add_load(self, _load):
+    def add_load(self, _load: int) -> None:
        """
        Adds load to the protocol.

@@ -42,6 +42,6 @@ class Protocol(object):
        """
        self.load += _load

-    def clear_load(self):
+    def clear_load(self) -> None:
        """Clears the load on this protocol."""
        self.load = 0
--- a/src/primaite/common/service.py
+++ b/src/primaite/common/service.py
@@ -1,83 +1,28 @@
-# Crown Copyright (C) Dstl 2022. DEFCON 703. Shared in confidence.
+# © Crown-owned copyright 2023, Defence Science and Technology Laboratory UK
 """The Service class."""

-from primaite.common.enums import SOFTWARE_STATE
+from primaite.common.enums import SoftwareState


 class Service(object):
    """Service class."""

-    def __init__(self, _name, _port, _state):
+    def __init__(self, name: str, port: str, software_state: SoftwareState) -> None:
        """
-        Init.
+        Initialise a service.

-        Args:
-            _name: The service name
-            _port: The service port
-            _state: The service state
+        :param name: The service name.
+        :param port: The service port.
+        :param software_state: The service SoftwareState.
        """
-        self.name = _name
-        self.port = _port
-        self.state = _state
-        self.patching_count = 0
+        self.name: str = name
+        self.port: str = port
+        self.software_state: SoftwareState = software_state
+        self.patching_count: int = 0

-    def set_name(self, _name):
-        """
-        Sets the service name.
-
-        Args:
-            _name: The service name
-        """
-        self.name = _name
-
-    def get_name(self):
-        """
-        Gets the service name.
-
-        Returns:
-             The service name
-        """
-        return self.name
-
-    def set_port(self, _port):
-        """
-        Sets the service port.
-
-        Args:
-            _port: The service port
-        """
-        self.port = _port
-
-    def get_port(self):
-        """
-        Gets the service port.
-
-        Returns:
-             The service port
-        """
-        return self.port
-
-    def set_state(self, _state):
-        """
-        Sets the service state.
-
-        Args:
-            _state: The service state
-        """
-        self.state = _state
-
-    def get_state(self):
-        """
-        Gets the service state.
-
-        Returns:
-             The service state
-        """
-        return self.state
-
-    def reduce_patching_count(self):
+    def reduce_patching_count(self) -> None:
        """Reduces the patching count for the service."""
        self.patching_count -= 1
        if self.patching_count <= 0:
            self.patching_count = 0
-            self.state = SOFTWARE_STATE.GOOD
+            self.software_state = SoftwareState.GOOD
--- a/src/primaite/config/init.py
+++ b/src/primaite/config/init.py
@@ -0,0 +1,2 @@
+# © Crown-owned copyright 2023, Defence Science and Technology Laboratory UK
+"""Configuration parameters for running experiments."""
--- a/src/primaite/config/_package_data/lay_down/lay_down_config_1_DDOS_basic.yaml
+++ b/src/primaite/config/_package_data/lay_down/lay_down_config_1_DDOS_basic.yaml
@@ -0,0 +1,166 @@
+- item_type: PORTS
+  ports_list:
+    - port: '80'
+- item_type: SERVICES
+  service_list:
+    - name: TCP
+- item_type: NODE
+  node_id: '1'
+  name: PC1
+  node_class: SERVICE
+  node_type: COMPUTER
+  priority: P5
+  hardware_state: 'ON'
+  ip_address: 192.168.1.2
+  software_state: GOOD
+  file_system_state: GOOD
+  services:
+    - name: TCP
+      port: '80'
+      state: GOOD
+- item_type: NODE
+  node_id: '2'
+  name: SERVER
+  node_class: SERVICE
+  node_type: SERVER
+  priority: P5
+  hardware_state: 'ON'
+  ip_address: 192.168.1.3
+  software_state: GOOD
+  file_system_state: GOOD
+  services:
+    - name: TCP
+      port: '80'
+      state: GOOD
+- item_type: NODE
+  node_id: '3'
+  name: PC2
+  node_class: SERVICE
+  node_type: COMPUTER
+  priority: P5
+  hardware_state: 'ON'
+  ip_address: 192.168.1.4
+  software_state: GOOD
+  file_system_state: GOOD
+  services:
+    - name: TCP
+      port: '80'
+      state: GOOD
+- item_type: NODE
+  node_id: '4'
+  name: SWITCH1
+  node_class: ACTIVE
+  node_type: SWITCH
+  priority: P2
+  hardware_state: 'ON'
+  ip_address: 192.168.1.5
+  software_state: GOOD
+  file_system_state: GOOD
+- item_type: NODE
+  node_id: '5'
+  name: SWITCH2
+  node_class: ACTIVE
+  node_type: SWITCH
+  priority: P2
+  hardware_state: 'ON'
+  ip_address: 192.168.1.6
+  software_state: GOOD
+  file_system_state: GOOD
+- item_type: NODE
+  node_id: '6'
+  name: SWITCH3
+  node_class: ACTIVE
+  node_type: SWITCH
+  priority: P2
+  hardware_state: 'ON'
+  ip_address: 192.168.1.7
+  software_state: GOOD
+  file_system_state: GOOD
+- item_type: LINK
+  id: '7'
+  name: link1
+  bandwidth: 1000000000
+  source: '1'
+  destination: '4'
+- item_type: LINK
+  id: '8'
+  name: link2
+  bandwidth: 1000000000
+  source: '4'
+  destination: '2'
+- item_type: LINK
+  id: '9'
+  name: link3
+  bandwidth: 1000000000
+  source: '2'
+  destination: '5'
+- item_type: LINK
+  id: '10'
+  name: link4
+  bandwidth: 1000000000
+  source: '2'
+  destination: '6'
+- item_type: LINK
+  id: '11'
+  name: link5
+  bandwidth: 1000000000
+  source: '5'
+  destination: '3'
+- item_type: LINK
+  id: '12'
+  name: link6
+  bandwidth: 1000000000
+  source: '6'
+  destination: '3'
+- item_type: GREEN_IER
+  id: '13'
+  start_step: 1
+  end_step: 128
+  load: 100000
+  protocol: TCP
+  port: '80'
+  source: '3'
+  destination: '2'
+  mission_criticality: 5
+- item_type: RED_POL
+  id: '14'
+  start_step: 50
+  end_step: 50
+  targetNodeId: '1'
+  initiator: DIRECT
+  type: SERVICE
+  protocol: TCP
+  state: COMPROMISED
+  sourceNodeId: NA
+  sourceNodeService: NA
+  sourceNodeServiceState: NA
+- item_type: RED_IER
+  id: '15'
+  start_step: 60
+  end_step: 100
+  load: 1000000
+  protocol: TCP
+  port: '80'
+  source: '1'
+  destination: '2'
+  mission_criticality: 0
+- item_type: RED_POL
+  id: '16'
+  start_step: 80
+  end_step: 80
+  targetNodeId: '2'
+  initiator: IER
+  type: SERVICE
+  protocol: TCP
+  state: COMPROMISED
+  sourceNodeId: NA
+  sourceNodeService: NA
+  sourceNodeServiceState: NA
+- item_type: ACL_RULE
+  id: '17'
+  permission: ALLOW
+  source: ANY
+  destination: ANY
+  protocol: ANY
+  port: ANY
+  position: 0
--- a/src/primaite/config/_package_data/lay_down/lay_down_config_2_DDOS_basic.yaml
+++ b/src/primaite/config/_package_data/lay_down/lay_down_config_2_DDOS_basic.yaml
@@ -1,312 +1,317 @@
- itemType: ACTIONS
-  type: NODE
- itemType: STEPS
-  steps: 128
- itemType: PORTS
-  portsList:
+- item_type: PORTS
+  ports_list:
    - port: '80'
- itemType: SERVICES
-  serviceList:
+- item_type: SERVICES
+  service_list:
    - name: TCP
- itemType: NODE
-  id: '1'
+- item_type: NODE
+  node_id: '1'
  name: PC1
-  baseType: SERVICE
-  nodeType: COMPUTER
+  node_class: SERVICE
+  node_type: COMPUTER
  priority: P5
-  hardwareState: 'ON'
-  ipAddress: 192.168.10.11
-  softwareState: GOOD
-  fileSystemState: GOOD
+  hardware_state: 'ON'
+  ip_address: 192.168.10.11
+  software_state: GOOD
+  file_system_state: GOOD
  services:
    - name: TCP
      port: '80'
      state: GOOD
- itemType: NODE
-  id: '2'
+- item_type: NODE
+  node_id: '2'
  name: PC2
-  baseType: SERVICE
-  nodeType: COMPUTER
+  node_class: SERVICE
+  node_type: COMPUTER
  priority: P5
-  hardwareState: 'ON'
-  ipAddress: 192.168.10.12
-  softwareState: GOOD
-  fileSystemState: GOOD
+  hardware_state: 'ON'
+  ip_address: 192.168.10.12
+  software_state: GOOD
+  file_system_state: GOOD
  services:
    - name: TCP
      port: '80'
      state: GOOD
- itemType: NODE
-  id: '3'
+- item_type: NODE
+  node_id: '3'
  name: PC3
-  baseType: SERVICE
-  nodeType: COMPUTER
+  node_class: SERVICE
+  node_type: COMPUTER
  priority: P5
-  hardwareState: 'ON'
-  ipAddress: 192.168.10.13
-  softwareState: GOOD
-  fileSystemState: GOOD
+  hardware_state: 'ON'
+  ip_address: 192.168.10.13
+  software_state: GOOD
+  file_system_state: GOOD
  services:
    - name: TCP
      port: '80'
      state: GOOD
- itemType: NODE
-  id: '4'
+- item_type: NODE
+  node_id: '4'
  name: PC4
-  baseType: SERVICE
-  nodeType: COMPUTER
+  node_class: SERVICE
+  node_type: COMPUTER
  priority: P5
-  hardwareState: 'ON'
-  ipAddress: 192.168.20.14
-  softwareState: GOOD
-  fileSystemState: GOOD
+  hardware_state: 'ON'
+  ip_address: 192.168.20.14
+  software_state: GOOD
+  file_system_state: GOOD
  services:
    - name: TCP
      port: '80'
      state: GOOD
- itemType: NODE
-  id: '5'
+- item_type: NODE
+  node_id: '5'
  name: SWITCH1
-  baseType: ACTIVE
-  nodeType: SWITCH
+  node_class: ACTIVE
+  node_type: SWITCH
  priority: P2
-  hardwareState: 'ON'
-  ipAddress: 192.168.1.2
-  softwareState: GOOD
-  fileSystemState: GOOD
- itemType: NODE
-  id: '6'
+  hardware_state: 'ON'
+  ip_address: 192.168.1.2
+  software_state: GOOD
+  file_system_state: GOOD
+- item_type: NODE
+  node_id: '6'
  name: IDS
-  baseType: SERVICE
-  nodeType: SERVER
+  node_class: SERVICE
+  node_type: SERVER
  priority: P5
-  hardwareState: 'ON'
-  ipAddress: 192.168.1.4
-  softwareState: GOOD
-  fileSystemState: GOOD
+  hardware_state: 'ON'
+  ip_address: 192.168.1.4
+  software_state: GOOD
+  file_system_state: GOOD
  services:
    - name: TCP
      port: '80'
      state: GOOD
- itemType: NODE
-  id: '7'
+- item_type: NODE
+  node_id: '7'
  name: SWITCH2
-  baseType: ACTIVE
-  nodeType: SWITCH
+  node_class: ACTIVE
+  node_type: SWITCH
  priority: P2
-  hardwareState: 'ON'
-  ipAddress: 192.168.1.3
-  softwareState: GOOD
-  fileSystemState: GOOD
- itemType: NODE
-  id: '8'
+  hardware_state: 'ON'
+  ip_address: 192.168.1.3
+  software_state: GOOD
+  file_system_state: GOOD
+- item_type: NODE
+  node_id: '8'
  name: LOP1
-  baseType: SERVICE
-  nodeType: LOP
+  node_class: SERVICE
+  node_type: LOP
  priority: P5
-  hardwareState: 'ON'
-  ipAddress: 192.168.1.12
-  softwareState: GOOD
-  fileSystemState: GOOD
+  hardware_state: 'ON'
+  ip_address: 192.168.1.12
+  software_state: GOOD
+  file_system_state: GOOD
  services:
    - name: TCP
      port: '80'
      state: GOOD
- itemType: NODE
-  id: '9'
+- item_type: NODE
+  node_id: '9'
  name: SERVER1
-  baseType: SERVICE
-  nodeType: SERVER
+  node_class: SERVICE
+  node_type: SERVER
  priority: P5
-  hardwareState: 'ON'
-  ipAddress: 192.168.10.14
-  softwareState: GOOD
-  fileSystemState: GOOD
+  hardware_state: 'ON'
+  ip_address: 192.168.10.14
+  software_state: GOOD
+  file_system_state: GOOD
  services:
    - name: TCP
      port: '80'
      state: GOOD
- itemType: NODE
-  id: '10'
+- item_type: NODE
+  node_id: '10'
  name: SERVER2
-  baseType: SERVICE
-  nodeType: SERVER
+  node_class: SERVICE
+  node_type: SERVER
  priority: P5
-  hardwareState: 'ON'
-  ipAddress: 192.168.20.15
-  softwareState: GOOD
-  fileSystemState: GOOD
+  hardware_state: 'ON'
+  ip_address: 192.168.20.15
+  software_state: GOOD
+  file_system_state: GOOD
  services:
    - name: TCP
      port: '80'
      state: GOOD
- itemType: LINK
+- item_type: LINK
  id: '11'
  name: link1
  bandwidth: 1000000000
  source: '1'
  destination: '5'
- itemType: LINK
+- item_type: LINK
  id: '12'
  name: link2
  bandwidth: 1000000000
  source: '2'
  destination: '5'
- itemType: LINK
+- item_type: LINK
  id: '13'
  name: link3
  bandwidth: 1000000000
  source: '3'
  destination: '5'
- itemType: LINK
+- item_type: LINK
  id: '14'
  name: link4
  bandwidth: 1000000000
  source: '4'
  destination: '5'
- itemType: LINK
+- item_type: LINK
  id: '15'
  name: link5
  bandwidth: 1000000000
  source: '5'
  destination: '6'
- itemType: LINK
+- item_type: LINK
  id: '16'
  name: link6
  bandwidth: 1000000000
  source: '5'
  destination: '8'
- itemType: LINK
+- item_type: LINK
  id: '17'
  name: link7
  bandwidth: 1000000000
  source: '6'
  destination: '7'
- itemType: LINK
+- item_type: LINK
  id: '18'
  name: link8
  bandwidth: 1000000000
  source: '8'
  destination: '7'
- itemType: LINK
+- item_type: LINK
  id: '19'
  name: link9
  bandwidth: 1000000000
  source: '7'
  destination: '9'
- itemType: LINK
+- item_type: LINK
  id: '20'
  name: link10
  bandwidth: 1000000000
  source: '7'
  destination: '10'
- itemType: GREEN_IER
+- item_type: GREEN_IER
  id: '21'
-  startStep: 1
-  endStep: 128
+  start_step: 1
+  end_step: 128
  load: 100000
  protocol: TCP
  port: '80'
  source: '1'
  destination: '9'
-  missionCriticality: 2
- itemType: GREEN_IER
+  mission_criticality: 2
+- item_type: GREEN_IER
  id: '22'
-  startStep: 1
-  endStep: 128
+  start_step: 1
+  end_step: 128
  load: 100000
  protocol: TCP
  port: '80'
  source: '2'
  destination: '9'
-  missionCriticality: 2
- itemType: GREEN_IER
+  mission_criticality: 2
+- item_type: GREEN_IER
  id: '23'
-  startStep: 1
-  endStep: 128
+  start_step: 1
+  end_step: 128
  load: 100000
  protocol: TCP
  port: '80'
  source: '9'
  destination: '3'
-  missionCriticality: 5
- itemType: GREEN_IER
+  mission_criticality: 5
+- item_type: GREEN_IER
  id: '24'
-  startStep: 1
-  endStep: 128
+  start_step: 1
+  end_step: 128
  load: 100000
  protocol: TCP
  port: '80'
  source: '4'
  destination: '10'
-  missionCriticality: 2
- itemType: ACL_RULE
+  mission_criticality: 2
+- item_type: ACL_RULE
  id: '25'
  permission: ALLOW
  source: 192.168.10.11
  destination: 192.168.10.14
  protocol: TCP
  port: 80
- itemType: ACL_RULE
+  position: 0
+- item_type: ACL_RULE
  id: '26'
  permission: ALLOW
  source: 192.168.10.12
  destination: 192.168.10.14
  protocol: TCP
  port: 80
- itemType: ACL_RULE
+  position: 1
+- item_type: ACL_RULE
  id: '27'
  permission: ALLOW
  source: 192.168.10.13
  destination: 192.168.10.14
  protocol: TCP
  port: 80
- itemType: ACL_RULE
+  position: 2
+- item_type: ACL_RULE
  id: '28'
  permission: ALLOW
  source: 192.168.20.14
  destination: 192.168.20.15
  protocol: TCP
  port: 80
- itemType: ACL_RULE
+  position: 3
+- item_type: ACL_RULE
  id: '29'
  permission: ALLOW
  source: 192.168.10.14
  destination: 192.168.10.13
  protocol: TCP
  port: 80
- itemType: ACL_RULE
+  position: 4
+- item_type: ACL_RULE
  id: '30'
  permission: DENY
  source: 192.168.10.11
  destination: 192.168.20.15
  protocol: TCP
  port: 80
- itemType: ACL_RULE
+  position: 5
+- item_type: ACL_RULE
  id: '31'
  permission: DENY
  source: 192.168.10.12
  destination: 192.168.20.15
  protocol: TCP
  port: 80
- itemType: ACL_RULE
+  position: 6
+- item_type: ACL_RULE
  id: '32'
  permission: DENY
  source: 192.168.10.13
  destination: 192.168.20.15
  protocol: TCP
  port: 80
- itemType: ACL_RULE
+  position: 7
+- item_type: ACL_RULE
  id: '33'
  permission: DENY
  source: 192.168.20.14
  destination: 192.168.10.14
  protocol: TCP
  port: 80
- itemType: RED_POL
+  position: 8
+- item_type: RED_POL
  id: '34'
-  startStep: 20
-  endStep: 20
+  start_step: 20
+  end_step: 20
  targetNodeId: '1'
  initiator: DIRECT
  type: SERVICE
@@ -315,10 +320,10 @@
  sourceNodeId: NA
  sourceNodeService: NA
  sourceNodeServiceState: NA
- itemType: RED_POL
+- item_type: RED_POL
  id: '35'
-  startStep: 20
-  endStep: 20
+  start_step: 20
+  end_step: 20
  targetNodeId: '2'
  initiator: DIRECT
  type: SERVICE
@@ -327,30 +332,30 @@
  sourceNodeId: NA
  sourceNodeService: NA
  sourceNodeServiceState: NA
- itemType: RED_IER
+- item_type: RED_IER
  id: '36'
-  startStep: 30
-  endStep: 128
+  start_step: 30
+  end_step: 128
  load: 440000000
  protocol: TCP
  port: '80'
  source: '1'
  destination: '9'
-  missionCriticality: 0
- itemType: RED_IER
+  mission_criticality: 0
+- item_type: RED_IER
  id: '37'
-  startStep: 30
-  endStep: 128
+  start_step: 30
+  end_step: 128
  load: 440000000
  protocol: TCP
  port: '80'
  source: '2'
  destination: '9'
-  missionCriticality: 0
- itemType: RED_POL
+  mission_criticality: 0
+- item_type: RED_POL
  id: '38'
-  startStep: 30
-  endStep: 30
+  start_step: 30
+  end_step: 30
  targetNodeId: '9'
  initiator: IER
  type: SERVICE
--- a/src/primaite/config/_package_data/lay_down/lay_down_config_3_DOS_very_basic.yaml
+++ b/src/primaite/config/_package_data/lay_down/lay_down_config_3_DOS_very_basic.yaml
@@ -1,138 +1,137 @@
- itemType: ACTIONS
-  type: NODE
- itemType: STEPS
-  steps: 256
- itemType: PORTS
-  portsList:
+- item_type: PORTS
+  ports_list:
    - port: '80'
- itemType: SERVICES
-  serviceList:
+- item_type: SERVICES
+  service_list:
    - name: TCP
- itemType: NODE
-  id: '1'
+- item_type: NODE
+  node_id: '1'
  name: PC1
-  baseType: SERVICE
-  nodeType: COMPUTER
+  node_class: SERVICE
+  node_type: COMPUTER
  priority: P5
-  hardwareState: 'ON'
-  ipAddress: 192.168.1.2
-  softwareState: GOOD
-  fileSystemState: GOOD
+  hardware_state: 'ON'
+  ip_address: 192.168.1.2
+  software_state: GOOD
+  file_system_state: GOOD
  services:
    - name: TCP
      port: '80'
      state: GOOD
- itemType: NODE
-  id: '2'
+- item_type: NODE
+  node_id: '2'
  name: PC2
-  baseType: SERVICE
-  nodeType: COMPUTER
+  node_class: SERVICE
+  node_type: COMPUTER
  priority: P5
-  hardwareState: 'ON'
-  ipAddress: 192.168.1.3
-  softwareState: GOOD
-  fileSystemState: GOOD
+  hardware_state: 'ON'
+  ip_address: 192.168.1.3
+  software_state: GOOD
+  file_system_state: GOOD
  services:
    - name: TCP
      port: '80'
      state: GOOD
- itemType: NODE
-  id: '3'
+- item_type: NODE
+  node_id: '3'
  name: SWITCH1
-  baseType: ACTIVE
-  nodeType: SWITCH
+  node_class: ACTIVE
+  node_type: SWITCH
  priority: P2
-  hardwareState: 'ON'
-  ipAddress: 192.168.1.1
-  softwareState: GOOD
-  fileSystemState: GOOD
- itemType: NODE
-  id: '4'
+  hardware_state: 'ON'
+  ip_address: 192.168.1.1
+  software_state: GOOD
+  file_system_state: GOOD
+- item_type: NODE
+  node_id: '4'
  name: SERVER1
-  baseType: SERVICE
-  nodeType: SERVER
+  node_class: SERVICE
+  node_type: SERVER
  priority: P5
-  hardwareState: 'ON'
-  ipAddress: 192.168.1.4
-  softwareState: GOOD
-  fileSystemState: GOOD
+  hardware_state: 'ON'
+  ip_address: 192.168.1.4
+  software_state: GOOD
+  file_system_state: GOOD
  services:
    - name: TCP
      port: '80'
      state: GOOD
- itemType: LINK
+- item_type: LINK
  id: '5'
  name: link1
  bandwidth: 1000000000
  source: '1'
  destination: '3'
- itemType: LINK
+- item_type: LINK
  id: '6'
  name: link2
  bandwidth: 1000000000
  source: '2'
  destination: '3'
- itemType: LINK
+- item_type: LINK
  id: '7'
  name: link3
  bandwidth: 1000000000
  source: '3'
  destination: '4'
- itemType: GREEN_IER
+- item_type: GREEN_IER
  id: '8'
-  startStep: 1
-  endStep: 256
+  start_step: 1
+  end_step: 256
  load: 10000
  protocol: TCP
  port: '80'
  source: '1'
  destination: '4'
-  missionCriticality: 1
- itemType: GREEN_IER
+  mission_criticality: 1
+- item_type: GREEN_IER
  id: '9'
-  startStep: 1
-  endStep: 256
+  start_step: 1
+  end_step: 256
  load: 10000
  protocol: TCP
  port: '80'
  source: '2'
  destination: '4'
-  missionCriticality: 1
- itemType: GREEN_IER
+  mission_criticality: 1
+- item_type: GREEN_IER
  id: '10'
-  startStep: 1
-  endStep: 256
+  start_step: 1
+  end_step: 256
  load: 10000
  protocol: TCP
  port: '80'
  source: '4'
  destination: '2'
-  missionCriticality: 5
- itemType: ACL_RULE
+  mission_criticality: 5
+- item_type: ACL_RULE
  id: '11'
  permission: ALLOW
  source: 192.168.1.2
  destination: 192.168.1.4
  protocol: TCP
  port: 80
- itemType: ACL_RULE
+  position: 0
+- item_type: ACL_RULE
  id: '12'
  permission: ALLOW
  source: 192.168.1.3
  destination: 192.168.1.4
  protocol: TCP
  port: 80
- itemType: ACL_RULE
+  position: 1
+- item_type: ACL_RULE
  id: '13'
  permission: ALLOW
  source: 192.168.1.4
  destination: 192.168.1.3
  protocol: TCP
  port: 80
- itemType: RED_POL
+  position: 2
+- item_type: RED_POL
  id: '14'
-  startStep: 20
-  endStep: 20
+  start_step: 20
+  end_step: 20
  targetNodeId: '1'
  initiator: DIRECT
  type: SERVICE
@@ -141,20 +140,20 @@
  sourceNodeId: NA
  sourceNodeService: NA
  sourceNodeServiceState: NA
- itemType: RED_IER
+- item_type: RED_IER
  id: '15'
-  startStep: 30
-  endStep: 256
+  start_step: 30
+  end_step: 256
  load: 10000000
  protocol: TCP
  port: '80'
  source: '1'
  destination: '4'
-  missionCriticality: 0
- itemType: RED_POL
+  mission_criticality: 0
+- item_type: RED_POL
  id: '16'
-  startStep: 40
-  endStep: 40
+  start_step: 40
+  end_step: 40
  targetNodeId: '4'
  initiator: IER
  type: SERVICE
--- a/src/primaite/config/_package_data/lay_down/lay_down_config_5_data_manipulation.yaml
+++ b/src/primaite/config/_package_data/lay_down/lay_down_config_5_data_manipulation.yaml
@@ -1,27 +1,23 @@
- itemType: ACTIONS
-  type: NODE
- itemType: STEPS
-  steps: 256
- itemType: PORTS
-  portsList:
+- item_type: PORTS
+  ports_list:
    - port: '80'
    - port: '1433'
    - port: '53'
- itemType: SERVICES
-  serviceList:
+- item_type: SERVICES
+  service_list:
    - name: TCP
    - name: TCP_SQL
    - name: UDP
- itemType: NODE
-  id: '1'
+- item_type: NODE
+  node_id: '1'
  name: CLIENT_1
-  baseType: SERVICE
-  nodeType: COMPUTER
+  node_class: SERVICE
+  node_type: COMPUTER
  priority: P5
-  hardwareState: 'ON'
-  ipAddress: 192.168.10.11
-  softwareState: GOOD
-  fileSystemState: GOOD
+  hardware_state: 'ON'
+  ip_address: 192.168.10.11
+  software_state: GOOD
+  file_system_state: GOOD
  services:
    - name: TCP
      port: '80'
@@ -29,40 +25,40 @@
    - name: UDP
      port: '53'
      state: GOOD
- itemType: NODE
-  id: '2'
+- item_type: NODE
+  node_id: '2'
  name: CLIENT_2
-  baseType: SERVICE
-  nodeType: COMPUTER
+  node_class: SERVICE
+  node_type: COMPUTER
  priority: P5
-  hardwareState: 'ON'
-  ipAddress: 192.168.10.12
-  softwareState: GOOD
-  fileSystemState: GOOD
+  hardware_state: 'ON'
+  ip_address: 192.168.10.12
+  software_state: GOOD
+  file_system_state: GOOD
  services:
    - name: TCP
      port: '80'
      state: GOOD
- itemType: NODE
-  id: '3'
+- item_type: NODE
+  node_id: '3'
  name: SWITCH_1
-  baseType: ACTIVE
-  nodeType: SWITCH
+  node_class: ACTIVE
+  node_type: SWITCH
  priority: P2
-  hardwareState: 'ON'
-  ipAddress: 192.168.10.1
-  softwareState: GOOD
-  fileSystemState: GOOD
- itemType: NODE
-  id: '4'
+  hardware_state: 'ON'
+  ip_address: 192.168.10.1
+  software_state: GOOD
+  file_system_state: GOOD
+- item_type: NODE
+  node_id: '4'
  name: SECURITY_SUITE
-  baseType: SERVICE
-  nodeType: SERVER
+  node_class: SERVICE
+  node_type: SERVER
  priority: P5
-  hardwareState: 'ON'
-  ipAddress: 192.168.1.10
-  softwareState: GOOD
-  fileSystemState: GOOD
+  hardware_state: 'ON'
+  ip_address: 192.168.1.10
+  software_state: GOOD
+  file_system_state: GOOD
  services:
    - name: TCP
      port: '80'
@@ -70,16 +66,16 @@
    - name: UDP
      port: '53'
      state: GOOD
- itemType: NODE
-  id: '5'
+- item_type: NODE
+  node_id: '5'
  name: MANAGEMENT_CONSOLE
-  baseType: SERVICE
-  nodeType: SERVER
+  node_class: SERVICE
+  node_type: SERVER
  priority: P5
-  hardwareState: 'ON'
-  ipAddress: 192.168.1.12
-  softwareState: GOOD
-  fileSystemState: GOOD
+  hardware_state: 'ON'
+  ip_address: 192.168.1.12
+  software_state: GOOD
+  file_system_state: GOOD
  services:
    - name: TCP
      port: '80'
@@ -87,26 +83,26 @@
    - name: UDP
      port: '53'
      state: GOOD
- itemType: NODE
-  id: '6'
+- item_type: NODE
+  node_id: '6'
  name: SWITCH_2
-  baseType: ACTIVE
-  nodeType: SWITCH
+  node_class: ACTIVE
+  node_type: SWITCH
  priority: P2
-  hardwareState: 'ON'
-  ipAddress: 192.168.2.1
-  softwareState: GOOD
-  fileSystemState: GOOD
- itemType: NODE
-  id: '7'
+  hardware_state: 'ON'
+  ip_address: 192.168.2.1
+  software_state: GOOD
+  file_system_state: GOOD
+- item_type: NODE
+  node_id: '7'
  name: WEB_SERVER
-  baseType: SERVICE
-  nodeType: SERVER
+  node_class: SERVICE
+  node_type: SERVER
  priority: P5
-  hardwareState: 'ON'
-  ipAddress: 192.168.2.10
-  softwareState: GOOD
-  fileSystemState: GOOD
+  hardware_state: 'ON'
+  ip_address: 192.168.2.10
+  software_state: GOOD
+  file_system_state: GOOD
  services:
    - name: TCP
      port: '80'
@@ -114,16 +110,16 @@
    - name: TCP_SQL
      port: '1433'
      state: GOOD
- itemType: NODE
-  id: '8'
+- item_type: NODE
+  node_id: '8'
  name: DATABASE_SERVER
-  baseType: SERVICE
-  nodeType: SERVER
+  node_class: SERVICE
+  node_type: SERVER
  priority: P5
-  hardwareState: 'ON'
-  ipAddress: 192.168.2.14
-  softwareState: GOOD
-  fileSystemState: GOOD
+  hardware_state: 'ON'
+  ip_address: 192.168.2.14
+  software_state: GOOD
+  file_system_state: GOOD
  services:
    - name: TCP
      port: '80'
@@ -134,337 +130,354 @@
    - name: UDP
      port: '53'
      state: GOOD
- itemType: NODE
-  id: '9'
+- item_type: NODE
+  node_id: '9'
  name: BACKUP_SERVER
-  baseType: SERVICE
-  nodeType: SERVER
+  node_class: SERVICE
+  node_type: SERVER
  priority: P5
-  hardwareState: 'ON'
-  ipAddress: 192.168.2.16
-  softwareState: GOOD
-  fileSystemState: GOOD
+  hardware_state: 'ON'
+  ip_address: 192.168.2.16
+  software_state: GOOD
+  file_system_state: GOOD
  services:
    - name: TCP
      port: '80'
      state: GOOD
- itemType: LINK
+- item_type: LINK
  id: '10'
  name: LINK_1
  bandwidth: 1000000000
  source: '1'
  destination: '3'
- itemType: LINK
+- item_type: LINK
  id: '11'
  name: LINK_2
  bandwidth: 1000000000
  source: '2'
  destination: '3'
- itemType: LINK
+- item_type: LINK
  id: '12'
  name: LINK_3
  bandwidth: 1000000000
  source: '3'
  destination: '4'
- itemType: LINK
+- item_type: LINK
  id: '13'
  name: LINK_4
  bandwidth: 1000000000
  source: '3'
  destination: '5'
- itemType: LINK
+- item_type: LINK
  id: '14'
  name: LINK_5
  bandwidth: 1000000000
  source: '4'
  destination: '6'
- itemType: LINK
+- item_type: LINK
  id: '15'
  name: LINK_6
  bandwidth: 1000000000
  source: '5'
  destination: '6'
- itemType: LINK
+- item_type: LINK
  id: '16'
  name: LINK_7
  bandwidth: 1000000000
  source: '6'
  destination: '7'
- itemType: LINK
+- item_type: LINK
  id: '17'
  name: LINK_8
  bandwidth: 1000000000
  source: '6'
  destination: '8'
- itemType: LINK
+- item_type: LINK
  id: '18'
  name: LINK_9
  bandwidth: 1000000000
  source: '6'
  destination: '9'
- itemType: GREEN_IER
+- item_type: GREEN_IER
  id: '19'
-  startStep: 1
-  endStep: 256
+  start_step: 1
+  end_step: 256
  load: 10000
  protocol: TCP
  port: '80'
  source: '1'
  destination: '7'
-  missionCriticality: 5
- itemType: GREEN_IER
+  mission_criticality: 5
+- item_type: GREEN_IER
  id: '20'
-  startStep: 1
-  endStep: 256
+  start_step: 1
+  end_step: 256
  load: 10000
  protocol: TCP
  port: '80'
  source: '7'
  destination: '1'
-  missionCriticality: 5
- itemType: GREEN_IER
+  mission_criticality: 5
+- item_type: GREEN_IER
  id: '21'
-  startStep: 1
-  endStep: 256
+  start_step: 1
+  end_step: 256
  load: 10000
  protocol: TCP
  port: '80'
  source: '2'
  destination: '7'
-  missionCriticality: 5
- itemType: GREEN_IER
+  mission_criticality: 5
+- item_type: GREEN_IER
  id: '22'
-  startStep: 1
-  endStep: 256
+  start_step: 1
+  end_step: 256
  load: 10000
  protocol: TCP
  port: '80'
  source: '7'
  destination: '2'
-  missionCriticality: 5
- itemType: GREEN_IER
+  mission_criticality: 5
+- item_type: GREEN_IER
  id: '23'
-  startStep: 1
-  endStep: 256
+  start_step: 1
+  end_step: 256
  load: 5000
  protocol: TCP_SQL
  port: '1433'
  source: '7'
  destination: '8'
-  missionCriticality: 5
- itemType: GREEN_IER
+  mission_criticality: 5
+- item_type: GREEN_IER
  id: '24'
-  startStep: 1
-  endStep: 256
+  start_step: 1
+  end_step: 256
  load: 100000
  protocol: TCP_SQL
  port: '1433'
  source: '8'
  destination: '7'
-  missionCriticality: 5
- itemType: GREEN_IER
+  mission_criticality: 5
+- item_type: GREEN_IER
  id: '25'
-  startStep: 1
-  endStep: 256
+  start_step: 1
+  end_step: 256
  load: 50000
  protocol: TCP
  port: '80'
  source: '1'
  destination: '9'
-  missionCriticality: 2
- itemType: GREEN_IER
+  mission_criticality: 2
+- item_type: GREEN_IER
  id: '26'
-  startStep: 1
-  endStep: 256
+  start_step: 1
+  end_step: 256
  load: 50000
  protocol: TCP
  port: '80'
  source: '2'
  destination: '9'
-  missionCriticality: 2
- itemType: GREEN_IER
+  mission_criticality: 2
+- item_type: GREEN_IER
  id: '27'
-  startStep: 1
-  endStep: 256
+  start_step: 1
+  end_step: 256
  load: 5000
  protocol: TCP
  port: '80'
  source: '5'
  destination: '7'
-  missionCriticality: 1
- itemType: GREEN_IER
+  mission_criticality: 1
+- item_type: GREEN_IER
  id: '28'
-  startStep: 1
-  endStep: 256
+  start_step: 1
+  end_step: 256
  load: 5000
  protocol: TCP
  port: '80'
  source: '7'
  destination: '5'
-  missionCriticality: 1
- itemType: GREEN_IER
+  mission_criticality: 1
+- item_type: GREEN_IER
  id: '29'
-  startStep: 1
-  endStep: 256
+  start_step: 1
+  end_step: 256
  load: 5000
  protocol: TCP
  port: '80'
  source: '5'
  destination: '8'
-  missionCriticality: 1
- itemType: GREEN_IER
+  mission_criticality: 1
+- item_type: GREEN_IER
  id: '30'
-  startStep: 1
-  endStep: 256
+  start_step: 1
+  end_step: 256
  load: 5000
  protocol: TCP
  port: '80'
  source: '8'
  destination: '5'
-  missionCriticality: 1
- itemType: GREEN_IER
+  mission_criticality: 1
+- item_type: GREEN_IER
  id: '31'
-  startStep: 1
-  endStep: 256
+  start_step: 1
+  end_step: 256
  load: 5000
  protocol: TCP
  port: '80'
  source: '5'
  destination: '9'
-  missionCriticality: 1
- itemType: GREEN_IER
+  mission_criticality: 1
+- item_type: GREEN_IER
  id: '32'
-  startStep: 1
-  endStep: 256
+  start_step: 1
+  end_step: 256
  load: 5000
  protocol: TCP
  port: '80'
  source: '9'
  destination: '5'
-  missionCriticality: 1
- itemType: ACL_RULE
+  mission_criticality: 1
+- item_type: ACL_RULE
  id: '33'
  permission: ALLOW
  source: 192.168.10.11
  destination: 192.168.2.10
  protocol: ANY
  port: ANY
- itemType: ACL_RULE
+  position: 0
+- item_type: ACL_RULE
  id: '34'
  permission: ALLOW
  source: 192.168.10.11
  destination: 192.168.2.14
  protocol: ANY
  port: ANY
- itemType: ACL_RULE
+  position: 1
+- item_type: ACL_RULE
  id: '35'
  permission: ALLOW
  source: 192.168.10.12
  destination: 192.168.2.14
  protocol: ANY
  port: ANY
- itemType: ACL_RULE
+  position: 2
+- item_type: ACL_RULE
  id: '36'
  permission: ALLOW
  source: 192.168.10.12
  destination: 192.168.2.10
  protocol: ANY
  port: ANY
- itemType: ACL_RULE
+  position: 3
+- item_type: ACL_RULE
  id: '37'
  permission: ALLOW
  source: 192.168.2.10
  destination: 192.168.10.11
  protocol: ANY
  port: ANY
- itemType: ACL_RULE
+  position: 4
+- item_type: ACL_RULE
  id: '38'
  permission: ALLOW
  source: 192.168.2.10
  destination: 192.168.10.12
  protocol: ANY
  port: ANY
- itemType: ACL_RULE
+  position: 5
+- item_type: ACL_RULE
  id: '39'
  permission: ALLOW
  source: 192.168.2.10
  destination: 192.168.2.14
  protocol: ANY
  port: ANY
- itemType: ACL_RULE
+  position: 6
+- item_type: ACL_RULE
  id: '40'
  permission: ALLOW
  source: 192.168.2.14
  destination: 192.168.2.10
  protocol: ANY
  port: ANY
- itemType: ACL_RULE
+  position: 7
+- item_type: ACL_RULE
  id: '41'
  permission: ALLOW
  source: 192.168.10.11
  destination: 192.168.2.16
  protocol: ANY
  port: ANY
- itemType: ACL_RULE
+  position: 8
+- item_type: ACL_RULE
  id: '42'
  permission: ALLOW
  source: 192.168.10.12
  destination: 192.168.2.16
  protocol: ANY
  port: ANY
- itemType: ACL_RULE
+  position: 9
+- item_type: ACL_RULE
  id: '43'
  permission: ALLOW
  source: 192.168.1.12
  destination: 192.168.2.10
  protocol: ANY
  port: ANY
- itemType: ACL_RULE
+  position: 10
+- item_type: ACL_RULE
  id: '44'
  permission: ALLOW
  source: 192.168.1.12
  destination: 192.168.2.14
  protocol: ANY
  port: ANY
- itemType: ACL_RULE
+  position: 11
+- item_type: ACL_RULE
  id: '45'
  permission: ALLOW
  source: 192.168.1.12
  destination: 192.168.2.16
  protocol: ANY
  port: ANY
- itemType: ACL_RULE
+  position: 12
+- item_type: ACL_RULE
  id: '46'
  permission: ALLOW
  source: 192.168.2.10
  destination: 192.168.1.12
  protocol: ANY
  port: ANY
- itemType: ACL_RULE
+  position: 13
+- item_type: ACL_RULE
  id: '47'
  permission: ALLOW
  source: 192.168.2.14
  destination: 192.168.1.12
  protocol: ANY
  port: ANY
- itemType: ACL_RULE
+  position: 14
+- item_type: ACL_RULE
  id: '48'
  permission: ALLOW
  source: 192.168.2.16
  destination: 192.168.1.12
  protocol: ANY
  port: ANY
- itemType: ACL_RULE
+  position: 15
+- item_type: ACL_RULE
  id: '49'
  permission: DENY
  source: ANY
  destination: ANY
  protocol: ANY
  port: ANY
- itemType: RED_POL
+  position: 16
+- item_type: RED_POL
  id: '50'
-  startStep: 50
-  endStep: 50
+  start_step: 50
+  end_step: 50
  targetNodeId: '1'
  initiator: DIRECT
  type: SERVICE
@@ -473,20 +486,20 @@
  sourceNodeId: NA
  sourceNodeService: NA
  sourceNodeServiceState: NA
- itemType: RED_IER
+- item_type: RED_IER
  id: '51'
-  startStep: 75
-  endStep: 105
+  start_step: 75
+  end_step: 105
  load: 10000
  protocol: UDP
  port: '53'
  source: '1'
  destination: '8'
-  missionCriticality: 0
- itemType: RED_POL
+  mission_criticality: 0
+- item_type: RED_POL
  id: '52'
-  startStep: 100
-  endStep: 100
+  start_step: 100
+  end_step: 100
  targetNodeId: '8'
  initiator: IER
  type: SERVICE
@@ -495,10 +508,10 @@
  sourceNodeId: NA
  sourceNodeService: NA
  sourceNodeServiceState: NA
- itemType: RED_POL
+- item_type: RED_POL
  id: '53'
-  startStep: 105
-  endStep: 105
+  start_step: 105
+  end_step: 105
  targetNodeId: '8'
  initiator: SERVICE
  type: FILE
@@ -507,10 +520,10 @@
  sourceNodeId: '8'
  sourceNodeService: UDP
  sourceNodeServiceState: COMPROMISED
- itemType: RED_POL
+- item_type: RED_POL
  id: '54'
-  startStep: 105
-  endStep: 105
+  start_step: 105
+  end_step: 105
  targetNodeId: '8'
  initiator: SERVICE
  type: SERVICE
@@ -519,10 +532,10 @@
  sourceNodeId: '8'
  sourceNodeService: UDP
  sourceNodeServiceState: COMPROMISED
- itemType: RED_POL
+- item_type: RED_POL
  id: '55'
-  startStep: 125
-  endStep: 125
+  start_step: 125
+  end_step: 125
  targetNodeId: '7'
  initiator: SERVICE
  type: SERVICE
--- a/src/primaite/config/_package_data/training/training_config_main.yaml
+++ b/src/primaite/config/_package_data/training/training_config_main.yaml
@@ -0,0 +1,168 @@
+# Training Config File
+
+# Sets which agent algorithm framework will be used.
+# Options are:
+# "SB3" (Stable Baselines3)
+# "RLLIB" (Ray RLlib)
+# "CUSTOM" (Custom Agent)
+agent_framework: SB3
+
+# Sets which deep learning framework will be used (by RLlib ONLY).
+# Default is TF (Tensorflow).
+# Options are:
+# "TF" (Tensorflow)
+# TF2 (Tensorflow 2.X)
+# TORCH (PyTorch)
+deep_learning_framework: TF2
+
+# Sets which Agent class will be used.
+# Options are:
+# "A2C" (Advantage Actor Critic coupled with either SB3 or RLLIB agent_framework)
+# "PPO" (Proximal Policy Optimization coupled with either SB3 or RLLIB agent_framework)
+# "HARDCODED" (The HardCoded agents coupled with an ACL or NODE action_type)
+# "DO_NOTHING" (The DoNothing agents coupled with an ACL or NODE action_type)
+# "RANDOM" (primaite.agents.simple.RandomAgent)
+# "DUMMY" (primaite.agents.simple.DummyAgent)
+agent_identifier: PPO
+
+# Sets whether Red Agent POL and IER is randomised.
+# Options are:
+# True
+# False
+random_red_agent: False
+
+# The (integer) seed to be used in random number generation
+# Default is None (null)
+seed: null
+
+# Set whether the agent evaluation will be deterministic instead of stochastic
+# Options are:
+# True
+# False
+deterministic: False
+
+# Sets what view of the environment the deterministic hardcoded agent has. The default is BASIC.
+# Options are:
+# "BASIC" (The current observation space only)
+# "FULL" (Full environment view with actions taken and reward feedback)
+hard_coded_agent_view: FULL
+
+# Sets How the Action Space is defined:
+# "NODE"
+# "ACL"
+# "ANY" node and acl actions
+action_type: ANY
+# observation space
+observation_space:
+  flatten: true
+  components:
+    - name: NODE_LINK_TABLE
+    - name: NODE_STATUSES
+    - name: LINK_TRAFFIC_LEVELS
+    - name: ACCESS_CONTROL_LIST
+
+# Number of episodes for training to run per session
+num_train_episodes: 10
+
+# Number of time_steps for training per episode
+num_train_steps: 256
+
+# Number of episodes for evaluation to run per session
+num_eval_episodes: 1
+
+# Number of time_steps for evaluation per episode
+num_eval_steps: 256
+
+# Sets how often the agent will save a checkpoint (every n time episodes).
+# Set to 0 if no checkpoints are required. Default is 10
+checkpoint_every_n_episodes: 10
+
+# Time delay (milliseconds) between steps for CUSTOM agents.
+time_delay: 5
+
+# Type of session to be run. Options are:
+# "TRAIN" (Trains an agent)
+# "EVAL" (Evaluates an agent)
+# "TRAIN_EVAL" (Trains then evaluates an agent)
+session_type: TRAIN_EVAL
+
+# Environment config values
+# The high value for the observation space
+observation_space_high_value: 1000000000
+
+# Implicit ACL firewall rule at end of ACL list to be the default action (ALLOW or DENY)
+implicit_acl_rule: DENY
+# Total number of ACL rules allowed in the environment
+max_number_acl_rules: 30
+
+# The Stable Baselines3 learn/eval output verbosity level:
+# Options are:
+# "NONE" (No Output)
+# "INFO" (Info Messages (such as devices and wrappers used))
+# "DEBUG" (All Messages)
+sb3_output_verbose_level: NONE
+
+# Reward values
+# Generic
+all_ok: 0
+# Node Hardware State
+off_should_be_on: -0.001
+off_should_be_resetting: -0.0005
+on_should_be_off: -0.0002
+on_should_be_resetting: -0.0005
+resetting_should_be_on: -0.0005
+resetting_should_be_off: -0.0002
+resetting: -0.0003
+# Node Software or Service State
+good_should_be_patching: 0.0002
+good_should_be_compromised: 0.0005
+good_should_be_overwhelmed: 0.0005
+patching_should_be_good: -0.0005
+patching_should_be_compromised: 0.0002
+patching_should_be_overwhelmed: 0.0002
+patching: -0.0003
+compromised_should_be_good: -0.002
+compromised_should_be_patching: -0.002
+compromised_should_be_overwhelmed: -0.002
+compromised: -0.002
+overwhelmed_should_be_good: -0.002
+overwhelmed_should_be_patching: -0.002
+overwhelmed_should_be_compromised: -0.002
+overwhelmed: -0.002
+# Node File System State
+good_should_be_repairing: 0.0002
+good_should_be_restoring: 0.0002
+good_should_be_corrupt: 0.0005
+good_should_be_destroyed: 0.001
+repairing_should_be_good: -0.0005
+repairing_should_be_restoring: 0.0002
+repairing_should_be_corrupt: 0.0002
+repairing_should_be_destroyed: 0.0000
+repairing: -0.0003
+restoring_should_be_good: -0.001
+restoring_should_be_repairing: -0.0002
+restoring_should_be_corrupt: 0.0001
+restoring_should_be_destroyed: 0.0002
+restoring: -0.0006
+corrupt_should_be_good: -0.001
+corrupt_should_be_repairing: -0.001
+corrupt_should_be_restoring: -0.001
+corrupt_should_be_destroyed: 0.0002
+corrupt: -0.001
+destroyed_should_be_good: -0.002
+destroyed_should_be_repairing: -0.002
+destroyed_should_be_restoring: -0.002
+destroyed_should_be_corrupt: -0.002
+destroyed: -0.002
+scanning: -0.0002
+# IER status
+red_ier_running: -0.0005
+green_ier_blocked: -0.001
+
+# Patching / Reset durations
+os_patching_duration: 5            # The time taken to patch the OS
+node_reset_duration: 5             # The time taken to reset a node (hardware)
+service_patching_duration: 5       # The time taken to patch a service
+file_system_repairing_limit: 5      # The time take to repair the file system
+file_system_restoring_limit: 5      # The time take to restore the file system
+file_system_scanning_limit: 5       # The time taken to scan the file system
--- a/src/primaite/config/config_1_DDOS_BASIC.yaml
+++ b/src/primaite/config/config_1_DDOS_BASIC.yaml
@@ -1,169 +0,0 @@
- itemType: ACTIONS
-  type: NODE
- itemType: STEPS
-  steps: 128
- itemType: PORTS
-  portsList:
-    - port: '80'
- itemType: SERVICES
-  serviceList:
-    - name: TCP
- itemType: NODE
-  id: '1'
-  name: PC1
-  baseType: SERVICE
-  nodeType: COMPUTER
-  priority: P5
-  hardwareState: 'ON'
-  ipAddress: 192.168.1.2
-  softwareState: GOOD
-  fileSystemState: GOOD
-  services:
-    - name: TCP
-      port: '80'
-      state: GOOD
- itemType: NODE
-  id: '2'
-  name: SERVER
-  baseType: SERVICE
-  nodeType: SERVER
-  priority: P5
-  hardwareState: 'ON'
-  ipAddress: 192.168.1.3
-  softwareState: GOOD
-  fileSystemState: GOOD
-  services:
-    - name: TCP
-      port: '80'
-      state: GOOD
- itemType: NODE
-  id: '3'
-  name: PC2
-  baseType: SERVICE
-  nodeType: COMPUTER
-  priority: P5
-  hardwareState: 'ON'
-  ipAddress: 192.168.1.4
-  softwareState: GOOD
-  fileSystemState: GOOD
-  services:
-    - name: TCP
-      port: '80'
-      state: GOOD
- itemType: NODE
-  id: '4'
-  name: SWITCH1
-  baseType: ACTIVE
-  nodeType: SWITCH
-  priority: P2
-  hardwareState: 'ON'
-  ipAddress: 192.168.1.5
-  softwareState: GOOD
-  fileSystemState: GOOD
- itemType: NODE
-  id: '5'
-  name: SWITCH2
-  baseType: ACTIVE
-  nodeType: SWITCH
-  priority: P2
-  hardwareState: 'ON'
-  ipAddress: 192.168.1.6
-  softwareState: GOOD
-  fileSystemState: GOOD
- itemType: NODE
-  id: '6'
-  name: SWITCH3
-  baseType: ACTIVE
-  nodeType: SWITCH
-  priority: P2
-  hardwareState: 'ON'
-  ipAddress: 192.168.1.7
-  softwareState: GOOD
-  fileSystemState: GOOD
- itemType: LINK
-  id: '7'
-  name: link1
-  bandwidth: 1000000000
-  source: '1'
-  destination: '4'
- itemType: LINK
-  id: '8'
-  name: link2
-  bandwidth: 1000000000
-  source: '4'
-  destination: '2'
- itemType: LINK
-  id: '9'
-  name: link3
-  bandwidth: 1000000000
-  source: '2'
-  destination: '5'
- itemType: LINK
-  id: '10'
-  name: link4
-  bandwidth: 1000000000
-  source: '2'
-  destination: '6'
- itemType: LINK
-  id: '11'
-  name: link5
-  bandwidth: 1000000000
-  source: '5'
-  destination: '3'
- itemType: LINK
-  id: '12'
-  name: link6
-  bandwidth: 1000000000
-  source: '6'
-  destination: '3'
- itemType: GREEN_IER
-  id: '13'
-  startStep: 1
-  endStep: 128
-  load: 100000
-  protocol: TCP
-  port: '80'
-  source: '3'
-  destination: '2'
-  missionCriticality: 5
- itemType: RED_POL
-  id: '14'
-  startStep: 50
-  endStep: 50
-  targetNodeId: '1'
-  initiator: DIRECT
-  type: SERVICE
-  protocol: TCP
-  state: COMPROMISED
-  sourceNodeId: NA
-  sourceNodeService: NA
-  sourceNodeServiceState: NA
- itemType: RED_IER
-  id: '15'
-  startStep: 60
-  endStep: 100
-  load: 1000000
-  protocol: TCP
-  port: '80'
-  source: '1'
-  destination: '2'
-  missionCriticality: 0
- itemType: RED_POL
-  id: '16'
-  startStep: 80
-  endStep: 80
-  targetNodeId: '2'
-  initiator: IER
-  type: SERVICE
-  protocol: TCP
-  state: COMPROMISED
-  sourceNodeId: NA
-  sourceNodeService: NA
-  sourceNodeServiceState: NA
- itemType: ACL_RULE
-  id: '17'
-  permission: ALLOW
-  source: ANY
-  destination: ANY
-  protocol: ANY
-  port: ANY
--- a/src/primaite/config/config_UNIT_TEST.yaml
+++ b/src/primaite/config/config_UNIT_TEST.yaml
@@ -1,533 +0,0 @@
- itemType: ACTIONS
-  type: NODE
- itemType: STEPS
-  steps: 256
- itemType: PORTS
-  portsList:
-    - port: '80'
-    - port: '1433'
-    - port: '53'
- itemType: SERVICES
-  serviceList:
-    - name: TCP
-    - name: TCP_SQL
-    - name: UDP
- itemType: NODE
-  id: '1'
-  name: CLIENT_1
-  baseType: SERVICE
-  nodeType: COMPUTER
-  priority: P5
-  hardwareState: 'ON'
-  ipAddress: 192.168.10.11
-  softwareState: GOOD
-  fileSystemState: GOOD
-  services:
-    - name: TCP
-      port: '80'
-      state: GOOD
-    - name: UDP
-      port: '53'
-      state: GOOD
- itemType: NODE
-  id: '2'
-  name: CLIENT_2
-  baseType: SERVICE
-  nodeType: COMPUTER
-  priority: P5
-  hardwareState: 'ON'
-  ipAddress: 192.168.10.12
-  softwareState: GOOD
-  fileSystemState: GOOD
-  services:
-    - name: TCP
-      port: '80'
-      state: GOOD
- itemType: NODE
-  id: '3'
-  name: SWITCH_1
-  baseType: ACTIVE
-  nodeType: SWITCH
-  priority: P2
-  hardwareState: 'ON'
-  ipAddress: 192.168.10.1
-  softwareState: GOOD
-  fileSystemState: GOOD
- itemType: NODE
-  id: '4'
-  name: SECURITY_SUITE
-  baseType: SERVICE
-  nodeType: SERVER
-  priority: P5
-  hardwareState: 'ON'
-  ipAddress: 192.168.1.10
-  softwareState: GOOD
-  fileSystemState: GOOD
-  services:
-    - name: TCP
-      port: '80'
-      state: GOOD
-    - name: UDP
-      port: '53'
-      state: GOOD
- itemType: NODE
-  id: '5'
-  name: MANAGEMENT_CONSOLE
-  baseType: SERVICE
-  nodeType: SERVER
-  priority: P5
-  hardwareState: 'ON'
-  ipAddress: 192.168.1.12
-  softwareState: GOOD
-  fileSystemState: GOOD
-  services:
-    - name: TCP
-      port: '80'
-      state: GOOD
-    - name: UDP
-      port: '53'
-      state: GOOD
- itemType: NODE
-  id: '6'
-  name: SWITCH_2
-  baseType: ACTIVE
-  nodeType: SWITCH
-  priority: P2
-  hardwareState: 'ON'
-  ipAddress: 192.168.2.1
-  softwareState: GOOD
-  fileSystemState: GOOD
- itemType: NODE
-  id: '7'
-  name: WEB_SERVER
-  baseType: SERVICE
-  nodeType: SERVER
-  priority: P5
-  hardwareState: 'ON'
-  ipAddress: 192.168.2.10
-  softwareState: GOOD
-  fileSystemState: GOOD
-  services:
-    - name: TCP
-      port: '80'
-      state: GOOD
-    - name: TCP_SQL
-      port: '1433'
-      state: GOOD
- itemType: NODE
-  id: '8'
-  name: DATABASE_SERVER
-  baseType: SERVICE
-  nodeType: SERVER
-  priority: P5
-  hardwareState: 'ON'
-  ipAddress: 192.168.2.14
-  softwareState: GOOD
-  fileSystemState: GOOD
-  services:
-    - name: TCP
-      port: '80'
-      state: GOOD
-    - name: TCP_SQL
-      port: '1433'
-      state: GOOD
-    - name: UDP
-      port: '53'
-      state: GOOD
- itemType: NODE
-  id: '9'
-  name: BACKUP_SERVER
-  baseType: SERVICE
-  nodeType: SERVER
-  priority: P5
-  hardwareState: 'ON'
-  ipAddress: 192.168.2.16
-  softwareState: GOOD
-  fileSystemState: GOOD
-  services:
-    - name: TCP
-      port: '80'
-      state: GOOD
- itemType: LINK
-  id: '10'
-  name: LINK_1
-  bandwidth: 1000000000
-  source: '1'
-  destination: '3'
- itemType: LINK
-  id: '11'
-  name: LINK_2
-  bandwidth: 1000000000
-  source: '2'
-  destination: '3'
- itemType: LINK
-  id: '12'
-  name: LINK_3
-  bandwidth: 1000000000
-  source: '3'
-  destination: '4'
- itemType: LINK
-  id: '13'
-  name: LINK_4
-  bandwidth: 1000000000
-  source: '3'
-  destination: '5'
- itemType: LINK
-  id: '14'
-  name: LINK_5
-  bandwidth: 1000000000
-  source: '4'
-  destination: '6'
- itemType: LINK
-  id: '15'
-  name: LINK_6
-  bandwidth: 1000000000
-  source: '5'
-  destination: '6'
- itemType: LINK
-  id: '16'
-  name: LINK_7
-  bandwidth: 1000000000
-  source: '6'
-  destination: '7'
- itemType: LINK
-  id: '17'
-  name: LINK_8
-  bandwidth: 1000000000
-  source: '6'
-  destination: '8'
- itemType: LINK
-  id: '18'
-  name: LINK_9
-  bandwidth: 1000000000
-  source: '6'
-  destination: '9'
- itemType: GREEN_IER
-  id: '19'
-  startStep: 1
-  endStep: 256
-  load: 10000
-  protocol: TCP
-  port: '80'
-  source: '1'
-  destination: '7'
-  missionCriticality: 5
- itemType: GREEN_IER
-  id: '20'
-  startStep: 1
-  endStep: 256
-  load: 10000
-  protocol: TCP
-  port: '80'
-  source: '7'
-  destination: '1'
-  missionCriticality: 5
- itemType: GREEN_IER
-  id: '21'
-  startStep: 1
-  endStep: 256
-  load: 10000
-  protocol: TCP
-  port: '80'
-  source: '2'
-  destination: '7'
-  missionCriticality: 5
- itemType: GREEN_IER
-  id: '22'
-  startStep: 1
-  endStep: 256
-  load: 10000
-  protocol: TCP
-  port: '80'
-  source: '7'
-  destination: '2'
-  missionCriticality: 5
- itemType: GREEN_IER
-  id: '23'
-  startStep: 1
-  endStep: 256
-  load: 5000
-  protocol: TCP_SQL
-  port: '1433'
-  source: '7'
-  destination: '8'
-  missionCriticality: 5
- itemType: GREEN_IER
-  id: '24'
-  startStep: 1
-  endStep: 256
-  load: 100000
-  protocol: TCP_SQL
-  port: '1433'
-  source: '8'
-  destination: '7'
-  missionCriticality: 5
- itemType: GREEN_IER
-  id: '25'
-  startStep: 1
-  endStep: 256
-  load: 50000
-  protocol: TCP
-  port: '80'
-  source: '1'
-  destination: '9'
-  missionCriticality: 2
- itemType: GREEN_IER
-  id: '26'
-  startStep: 1
-  endStep: 256
-  load: 50000
-  protocol: TCP
-  port: '80'
-  source: '2'
-  destination: '9'
-  missionCriticality: 2
- itemType: GREEN_IER
-  id: '27'
-  startStep: 1
-  endStep: 256
-  load: 5000
-  protocol: TCP
-  port: '80'
-  source: '5'
-  destination: '7'
-  missionCriticality: 1
- itemType: GREEN_IER
-  id: '28'
-  startStep: 1
-  endStep: 256
-  load: 5000
-  protocol: TCP
-  port: '80'
-  source: '7'
-  destination: '5'
-  missionCriticality: 1
- itemType: GREEN_IER
-  id: '29'
-  startStep: 1
-  endStep: 256
-  load: 5000
-  protocol: TCP
-  port: '80'
-  source: '5'
-  destination: '8'
-  missionCriticality: 1
- itemType: GREEN_IER
-  id: '30'
-  startStep: 1
-  endStep: 256
-  load: 5000
-  protocol: TCP
-  port: '80'
-  source: '8'
-  destination: '5'
-  missionCriticality: 1
- itemType: GREEN_IER
-  id: '31'
-  startStep: 1
-  endStep: 256
-  load: 5000
-  protocol: TCP
-  port: '80'
-  source: '5'
-  destination: '9'
-  missionCriticality: 1
- itemType: GREEN_IER
-  id: '32'
-  startStep: 1
-  endStep: 256
-  load: 5000
-  protocol: TCP
-  port: '80'
-  source: '9'
-  destination: '5'
-  missionCriticality: 1
- itemType: ACL_RULE
-  id: '33'
-  permission: ALLOW
-  source: 192.168.10.11
-  destination: 192.168.2.10
-  protocol: ANY
-  port: ANY
- itemType: ACL_RULE
-  id: '34'
-  permission: ALLOW
-  source: 192.168.10.11
-  destination: 192.168.2.14
-  protocol: ANY
-  port: ANY
- itemType: ACL_RULE
-  id: '35'
-  permission: ALLOW
-  source: 192.168.10.12
-  destination: 192.168.2.14
-  protocol: ANY
-  port: ANY
- itemType: ACL_RULE
-  id: '36'
-  permission: ALLOW
-  source: 192.168.10.12
-  destination: 192.168.2.10
-  protocol: ANY
-  port: ANY
- itemType: ACL_RULE
-  id: '37'
-  permission: ALLOW
-  source: 192.168.2.10
-  destination: 192.168.10.11
-  protocol: ANY
-  port: ANY
- itemType: ACL_RULE
-  id: '38'
-  permission: ALLOW
-  source: 192.168.2.10
-  destination: 192.168.10.12
-  protocol: ANY
-  port: ANY
- itemType: ACL_RULE
-  id: '39'
-  permission: ALLOW
-  source: 192.168.2.10
-  destination: 192.168.2.14
-  protocol: ANY
-  port: ANY
- itemType: ACL_RULE
-  id: '40'
-  permission: ALLOW
-  source: 192.168.2.14
-  destination: 192.168.2.10
-  protocol: ANY
-  port: ANY
- itemType: ACL_RULE
-  id: '41'
-  permission: ALLOW
-  source: 192.168.10.11
-  destination: 192.168.2.16
-  protocol: ANY
-  port: ANY
- itemType: ACL_RULE
-  id: '42'
-  permission: ALLOW
-  source: 192.168.10.12
-  destination: 192.168.2.16
-  protocol: ANY
-  port: ANY
- itemType: ACL_RULE
-  id: '43'
-  permission: ALLOW
-  source: 192.168.1.12
-  destination: 192.168.2.10
-  protocol: ANY
-  port: ANY
- itemType: ACL_RULE
-  id: '44'
-  permission: ALLOW
-  source: 192.168.1.12
-  destination: 192.168.2.14
-  protocol: ANY
-  port: ANY
- itemType: ACL_RULE
-  id: '45'
-  permission: ALLOW
-  source: 192.168.1.12
-  destination: 192.168.2.16
-  protocol: ANY
-  port: ANY
- itemType: ACL_RULE
-  id: '46'
-  permission: ALLOW
-  source: 192.168.2.10
-  destination: 192.168.1.12
-  protocol: ANY
-  port: ANY
- itemType: ACL_RULE
-  id: '47'
-  permission: ALLOW
-  source: 192.168.2.14
-  destination: 192.168.1.12
-  protocol: ANY
-  port: ANY
- itemType: ACL_RULE
-  id: '48'
-  permission: ALLOW
-  source: 192.168.2.16
-  destination: 192.168.1.12
-  protocol: ANY
-  port: ANY
- itemType: ACL_RULE
-  id: '49'
-  permission: DENY
-  source: ANY
-  destination: ANY
-  protocol: ANY
-  port: ANY
- itemType: RED_POL
-  id: '50'
-  startStep: 50
-  endStep: 50
-  targetNodeId: '1'
-  initiator: DIRECT
-  type: SERVICE
-  protocol: UDP
-  state: COMPROMISED
-  sourceNodeId: NA
-  sourceNodeService: NA
-  sourceNodeServiceState: NA
- itemType: RED_IER
-  id: '51'
-  startStep: 75
-  endStep: 105
-  load: 10000
-  protocol: UDP
-  port: '53'
-  source: '1'
-  destination: '8'
-  missionCriticality: 0
- itemType: RED_POL
-  id: '52'
-  startStep: 100
-  endStep: 100
-  targetNodeId: '8'
-  initiator: IER
-  type: SERVICE
-  protocol: UDP
-  state: COMPROMISED
-  sourceNodeId: NA
-  sourceNodeService: NA
-  sourceNodeServiceState: NA
- itemType: RED_POL
-  id: '53'
-  startStep: 105
-  endStep: 105
-  targetNodeId: '8'
-  initiator: SERVICE
-  type: FILE
-  protocol: NA
-  state: CORRUPT
-  sourceNodeId: '8'
-  sourceNodeService: UDP
-  sourceNodeServiceState: COMPROMISED
- itemType: RED_POL
-  id: '54'
-  startStep: 105
-  endStep: 105
-  targetNodeId: '8'
-  initiator: SERVICE
-  type: SERVICE
-  protocol: TCP_SQL
-  state: COMPROMISED
-  sourceNodeId: '8'
-  sourceNodeService: UDP
-  sourceNodeServiceState: COMPROMISED
- itemType: RED_POL
-  id: '55'
-  startStep: 125
-  endStep: 125
-  targetNodeId: '7'
-  initiator: SERVICE
-  type: SERVICE
-  protocol: TCP
-  state: OVERWHELMED
-  sourceNodeId: '8'
-  sourceNodeService: TCP_SQL
-  sourceNodeServiceState: COMPROMISED
--- a/src/primaite/config/lay_down_config.py
+++ b/src/primaite/config/lay_down_config.py
@@ -0,0 +1,112 @@
+# © Crown-owned copyright 2023, Defence Science and Technology Laboratory UK
+from logging import Logger
+from pathlib import Path
+from typing import Any, Dict, Final, Union
+
+import yaml
+
+from primaite import getLogger, PRIMAITE_PATHS
+
+_LOGGER: Logger = getLogger(__name__)
+
+_EXAMPLE_LAY_DOWN: Final[Path] = PRIMAITE_PATHS.user_config_path / "example_config" / "lay_down"
+
+
+def convert_legacy_lay_down_config_dict(legacy_config_dict: Dict[str, Any]) -> Dict[str, Any]:
+    """
+    Convert a legacy lay down config dict to the new format.
+
+    :param legacy_config_dict: A legacy lay down config dict.
+    """
+    _LOGGER.warning("Legacy lay down config conversion not yet implemented")
+    return legacy_config_dict
+
+
+def load(file_path: Union[str, Path], legacy_file: bool = False) -> Dict:
+    """
+    Read in a lay down config yaml file.
+
+    :param file_path: The config file path.
+    :param legacy_file: True if the config file is legacy format, otherwise False.
+    :return: The lay down config as a dict.
+    :raises ValueError: If the file_path does not exist.
+    """
+    if not isinstance(file_path, Path):
+        file_path = Path(file_path)
+    if file_path.exists():
+        with open(file_path, "r") as file:
+            config = yaml.safe_load(file)
+            _LOGGER.debug(f"Loading lay down config file: {file_path}")
+        if legacy_file:
+            try:
+                config = convert_legacy_lay_down_config_dict(config)
+            except KeyError:
+                msg = (
+                    f"Failed to convert lay down config file {file_path} "
+                    f"from legacy format. Attempting to use file as is."
+                )
+                _LOGGER.error(msg)
+        return config
+    msg = f"Cannot load the lay down config as it does not exist: {file_path}"
+    _LOGGER.error(msg)
+    raise ValueError(msg)
+
+
+def ddos_basic_one_config_path() -> Path:
+    """
+    The path to the example lay_down_config_1_DDOS_basic.yaml file.
+
+    :return: The file path.
+    """
+    path = _EXAMPLE_LAY_DOWN / "lay_down_config_1_DDOS_basic.yaml"
+    if not path.exists():
+        msg = "Example config not found. Please run 'primaite setup'"
+        _LOGGER.critical(msg)
+        raise FileNotFoundError(msg)
+
+    return path
+
+
+def ddos_basic_two_config_path() -> Path:
+    """
+    The path to the example lay_down_config_2_DDOS_basic.yaml file.
+
+    :return: The file path.
+    """
+    path = _EXAMPLE_LAY_DOWN / "lay_down_config_2_DDOS_basic.yaml"
+    if not path.exists():
+        msg = "Example config not found. Please run 'primaite setup'"
+        _LOGGER.critical(msg)
+        raise FileNotFoundError(msg)
+
+    return path
+
+
+def dos_very_basic_config_path() -> Path:
+    """
+    The path to the example lay_down_config_3_DOS_very_basic.yaml file.
+
+    :return: The file path.
+    """
+    path = _EXAMPLE_LAY_DOWN / "lay_down_config_3_DOS_very_basic.yaml"
+    if not path.exists():
+        msg = "Example config not found. Please run 'primaite setup'"
+        _LOGGER.critical(msg)
+        raise FileNotFoundError(msg)
+
+    return path
+
+
+def data_manipulation_config_path() -> Path:
+    """
+    The path to the example lay_down_config_5_data_manipulation.yaml file.
+
+    :return: The file path.
+    """
+    path = _EXAMPLE_LAY_DOWN / "lay_down_config_5_data_manipulation.yaml"
+    if not path.exists():
+        msg = "Example config not found. Please run 'primaite setup'"
+        _LOGGER.critical(msg)
+        raise FileNotFoundError(msg)
+
+    return path
--- a/src/primaite/config/training_config.py
+++ b/src/primaite/config/training_config.py
@@ -0,0 +1,424 @@
+# © Crown-owned copyright 2023, Defence Science and Technology Laboratory UK
+from __future__ import annotations
+
+from dataclasses import dataclass, field
+from logging import Logger
+from pathlib import Path
+from typing import Any, Dict, Final, Optional, Union
+
+import yaml
+
+from primaite import getLogger, PRIMAITE_PATHS
+from primaite.common.enums import (
+    ActionType,
+    AgentFramework,
+    AgentIdentifier,
+    DeepLearningFramework,
+    HardCodedAgentView,
+    RulePermissionType,
+    SB3OutputVerboseLevel,
+    SessionType,
+)
+
+_LOGGER: Logger = getLogger(__name__)
+
+_EXAMPLE_TRAINING: Final[Path] = PRIMAITE_PATHS.user_config_path / "example_config" / "training"
+
+
+def main_training_config_path() -> Path:
+    """
+    The path to the example training_config_main.yaml file.
+
+    :return: The file path.
+    """
+    path = _EXAMPLE_TRAINING / "training_config_main.yaml"
+    if not path.exists():
+        msg = "Example config not found. Please run 'primaite setup'"
+        _LOGGER.critical(msg)
+        raise FileNotFoundError(msg)
+
+    return path
+
+
+@dataclass()
+class TrainingConfig:
+    """The Training Config class."""
+
+    agent_framework: AgentFramework = AgentFramework.SB3
+    "The AgentFramework"
+
+    deep_learning_framework: DeepLearningFramework = DeepLearningFramework.TF
+    "The DeepLearningFramework"
+
+    agent_identifier: AgentIdentifier = AgentIdentifier.PPO
+    "The AgentIdentifier"
+
+    hard_coded_agent_view: HardCodedAgentView = HardCodedAgentView.FULL
+    "The view the deterministic hard-coded agent has of the environment"
+
+    random_red_agent: bool = False
+    "Creates Random Red Agent Attacks"
+
+    action_type: ActionType = ActionType.ANY
+    "The ActionType to use"
+
+    num_train_episodes: int = 10
+    "The number of episodes to train over during an training session"
+
+    num_train_steps: int = 256
+    "The number of steps in an episode during an training session"
+
+    num_eval_episodes: int = 1
+    "The number of episodes to train over during an evaluation session"
+
+    num_eval_steps: int = 256
+    "The number of steps in an episode during an evaluation session"
+
+    checkpoint_every_n_episodes: int = 5
+    "The agent will save a checkpoint every n episodes"
+
+    observation_space: dict = field(default_factory=lambda: {"components": [{"name": "NODE_LINK_TABLE"}]})
+    "The observation space config dict"
+
+    time_delay: int = 10
+    "The delay between steps (ms). Applies to generic agents only"
+
+    # file
+    session_type: SessionType = SessionType.TRAIN
+    "The type of PrimAITE session to run"
+
+    load_agent: bool = False
+    "Determine whether to load an agent from file"
+
+    agent_load_file: Optional[str] = None
+    "File path and file name of agent if you're loading one in"
+
+    # Environment
+    observation_space_high_value: int = 1000000000
+    "The high value for the observation space"
+
+    sb3_output_verbose_level: SB3OutputVerboseLevel = SB3OutputVerboseLevel.NONE
+    "Stable Baselines3 learn/eval output verbosity level"
+
+    implicit_acl_rule: RulePermissionType = RulePermissionType.DENY
+    "ALLOW or DENY implicit firewall rule to go at the end of list of ACL list."
+
+    max_number_acl_rules: int = 30
+    "Sets a limit for number of acl rules allowed in the list and environment."
+
+    # Reward values
+    # Generic
+    all_ok: float = 0
+
+    # Node Hardware State
+    off_should_be_on: float = -0.001
+    off_should_be_resetting: float = -0.0005
+    on_should_be_off: float = -0.0002
+    on_should_be_resetting: float = -0.0005
+    resetting_should_be_on: float = -0.0005
+    resetting_should_be_off: float = -0.0002
+    resetting: float = -0.0003
+
+    # Node Software or Service State
+    good_should_be_patching: float = 0.0002
+    good_should_be_compromised: float = 0.0005
+    good_should_be_overwhelmed: float = 0.0005
+    patching_should_be_good: float = -0.0005
+    patching_should_be_compromised: float = 0.0002
+    patching_should_be_overwhelmed: float = 0.0002
+    patching: float = -0.0003
+    compromised_should_be_good: float = -0.002
+    compromised_should_be_patching: float = -0.002
+    compromised_should_be_overwhelmed: float = -0.002
+    compromised: float = -0.002
+    overwhelmed_should_be_good: float = -0.002
+    overwhelmed_should_be_patching: float = -0.002
+    overwhelmed_should_be_compromised: float = -0.002
+    overwhelmed: float = -0.002
+
+    # Node File System State
+    good_should_be_repairing: float = 0.0002
+    good_should_be_restoring: float = 0.0002
+    good_should_be_corrupt: float = 0.0005
+    good_should_be_destroyed: float = 0.001
+    repairing_should_be_good: float = -0.0005
+    repairing_should_be_restoring: float = 0.0002
+    repairing_should_be_corrupt: float = 0.0002
+    repairing_should_be_destroyed: float = 0.0000
+    repairing: float = -0.0003
+    restoring_should_be_good: float = -0.001
+    restoring_should_be_repairing: float = -0.0002
+    restoring_should_be_corrupt: float = 0.0001
+    restoring_should_be_destroyed: float = 0.0002
+    restoring: float = -0.0006
+    corrupt_should_be_good: float = -0.001
+    corrupt_should_be_repairing: float = -0.001
+    corrupt_should_be_restoring: float = -0.001
+    corrupt_should_be_destroyed: float = 0.0002
+    corrupt: float = -0.001
+    destroyed_should_be_good: float = -0.002
+    destroyed_should_be_repairing: float = -0.002
+    destroyed_should_be_restoring: float = -0.002
+    destroyed_should_be_corrupt: float = -0.002
+    destroyed: float = -0.002
+    scanning: float = -0.0002
+
+    # IER status
+    red_ier_running: float = -0.0005
+    green_ier_blocked: float = -0.001
+
+    # Patching / Reset durations
+    os_patching_duration: int = 5
+    "The time taken to patch the OS"
+
+    node_reset_duration: int = 5
+    "The time taken to reset a node (hardware)"
+
+    node_booting_duration: int = 3
+    "The Time taken to turn on the node"
+
+    node_shutdown_duration: int = 2
+    "The time taken to turn off the node"
+
+    service_patching_duration: int = 5
+    "The time taken to patch a service"
+
+    file_system_repairing_limit: int = 5
+    "The time take to repair the file system"
+
+    file_system_restoring_limit: int = 5
+    "The time take to restore the file system"
+
+    file_system_scanning_limit: int = 5
+    "The time taken to scan the file system"
+
+    deterministic: bool = False
+    "If true, the training will be deterministic"
+
+    seed: Optional[int] = None
+    "The random number generator seed to be used while training the agent"
+
+    @classmethod
+    def from_dict(cls, config_dict: Dict[str, Any]) -> TrainingConfig:
+        """
+        Create an instance of TrainingConfig from a dict.
+
+        :param config_dict: The training config dict.
+        :return: The instance of TrainingConfig.
+        """
+        field_enum_map = {
+            "agent_framework": AgentFramework,
+            "deep_learning_framework": DeepLearningFramework,
+            "agent_identifier": AgentIdentifier,
+            "action_type": ActionType,
+            "session_type": SessionType,
+            "sb3_output_verbose_level": SB3OutputVerboseLevel,
+            "hard_coded_agent_view": HardCodedAgentView,
+            "implicit_acl_rule": RulePermissionType,
+        }
+
+        # convert the string representation of enums into the actual enum values themselves?
+        for key, value in field_enum_map.items():
+            if key in config_dict:
+                config_dict[key] = value[config_dict[key]]
+
+        return TrainingConfig(**config_dict)
+
+    def to_dict(self, json_serializable: bool = True) -> Dict:
+        """
+        Serialise the ``TrainingConfig`` as dict.
+
+        :param json_serializable: If True, Enums are converted to their
+            string name.
+        :return: The ``TrainingConfig`` as a dict.
+        """
+        data = self.__dict__
+        if json_serializable:
+            data["agent_framework"] = self.agent_framework.name
+            data["deep_learning_framework"] = self.deep_learning_framework.name
+            data["agent_identifier"] = self.agent_identifier.name
+            data["action_type"] = self.action_type.name
+            data["sb3_output_verbose_level"] = self.sb3_output_verbose_level.name
+            data["session_type"] = self.session_type.name
+            data["hard_coded_agent_view"] = self.hard_coded_agent_view.name
+            data["implicit_acl_rule"] = self.implicit_acl_rule.name
+
+        return data
+
+    def __str__(self) -> str:
+        obs_str = ",".join([c["name"] for c in self.observation_space["components"]])
+        tc = f"{self.agent_framework}, "
+        if self.agent_framework is AgentFramework.RLLIB:
+            tc += f"{self.deep_learning_framework}, "
+        tc += f"{self.agent_identifier}, "
+        if self.agent_identifier is AgentIdentifier.HARDCODED:
+            tc += f"{self.hard_coded_agent_view}, "
+        tc += f"{self.action_type}, "
+        tc += f"observation_space={obs_str}, "
+        if self.session_type is SessionType.TRAIN:
+            tc += f"{self.num_train_episodes} episodes @ "
+            tc += f"{self.num_train_steps} steps"
+        elif self.session_type is SessionType.EVAL:
+            tc += f"{self.num_eval_episodes} episodes @ "
+            tc += f"{self.num_eval_steps} steps"
+        else:
+            tc += f"Training: {self.num_eval_episodes} episodes @ "
+            tc += f"{self.num_eval_steps} steps"
+            tc += f"Evaluation: {self.num_eval_episodes} episodes @ "
+            tc += f"{self.num_eval_steps} steps"
+        return tc
+
+
+def load(file_path: Union[str, Path], legacy_file: bool = False) -> TrainingConfig:
+    """
+    Read in a training config yaml file.
+
+    :param file_path: The config file path.
+    :param legacy_file: True if the config file is legacy format, otherwise
+        False.
+    :return: An instance of
+        :class:`~primaite.config.training_config.TrainingConfig`.
+    :raises ValueError: If the file_path does not exist.
+    :raises TypeError: When the TrainingConfig object cannot be created
+        using the values from the config file read from ``file_path``.
+    """
+    if not isinstance(file_path, Path):
+        file_path = Path(file_path)
+    if file_path.exists():
+        with open(file_path, "r") as file:
+            config = yaml.safe_load(file)
+            _LOGGER.debug(f"Loading training config file: {file_path}")
+        if legacy_file:
+            try:
+                config = convert_legacy_training_config_dict(config)
+            except KeyError:
+                msg = (
+                    f"Failed to convert training config file {file_path} "
+                    f"from legacy format. Attempting to use file as is."
+                )
+                _LOGGER.error(msg)
+        try:
+            return TrainingConfig.from_dict(config)
+        except TypeError as e:
+            msg = f"Error when creating an instance of {TrainingConfig} " f"from the training config file {file_path}"
+            _LOGGER.critical(msg, exc_info=True)
+            raise e
+    msg = f"Cannot load the training config as it does not exist: {file_path}"
+    _LOGGER.error(msg)
+    raise ValueError(msg)
+
+
+def convert_legacy_training_config_dict(
+    legacy_config_dict: Dict[str, Any],
+    agent_framework: AgentFramework = AgentFramework.SB3,
+    agent_identifier: AgentIdentifier = AgentIdentifier.PPO,
+    action_type: ActionType = ActionType.ANY,
+    num_train_steps: int = 256,
+) -> Dict[str, Any]:
+    """
+    Convert a legacy training config dict to the new format.
+
+    :param legacy_config_dict: A legacy training config dict.
+    :param agent_framework: The agent framework to use as legacy training
+        configs don't have agent_framework values.
+    :param agent_identifier: The red agent identifier to use as legacy
+        training configs don't have agent_identifier values.
+    :param action_type: The action space type to set as legacy training configs
+        don't have action_type values.
+    :param num_train_steps: The number of steps to set as legacy training configs
+        don't have num_train_steps values.
+    :return: The converted training config dict.
+    """
+    config_dict = {
+        "agent_framework": agent_framework.name,
+        "agent_identifier": agent_identifier.name,
+        "action_type": action_type.name,
+        "num_train_steps": num_train_steps,
+        "sb3_output_verbose_level": SB3OutputVerboseLevel.INFO.name,
+    }
+    session_type_map = {"TRAINING": "TRAIN", "EVALUATION": "EVAL"}
+    legacy_config_dict["sessionType"] = session_type_map[legacy_config_dict["sessionType"]]
+    for legacy_key, value in legacy_config_dict.items():
+        new_key = _get_new_key_from_legacy(legacy_key)
+        if new_key:
+            config_dict[new_key] = value
+    return config_dict
+
+
+def _get_new_key_from_legacy(legacy_key: str) -> Optional[str]:
+    """
+    Maps legacy training config keys to the new format keys.
+
+    :param legacy_key: A legacy training config key.
+    :return: The mapped key.
+    """
+    key_mapping = {
+        "agentIdentifier": None,
+        "numEpisodes": "num_train_episodes",
+        "numSteps": "num_train_steps",
+        "timeDelay": "time_delay",
+        "configFilename": None,
+        "sessionType": "session_type",
+        "loadAgent": "load_agent",
+        "agentLoadFile": "agent_load_file",
+        "observationSpaceHighValue": "observation_space_high_value",
+        "allOk": "all_ok",
+        "offShouldBeOn": "off_should_be_on",
+        "offShouldBeResetting": "off_should_be_resetting",
+        "onShouldBeOff": "on_should_be_off",
+        "onShouldBeResetting": "on_should_be_resetting",
+        "resettingShouldBeOn": "resetting_should_be_on",
+        "resettingShouldBeOff": "resetting_should_be_off",
+        "resetting": "resetting",
+        "goodShouldBePatching": "good_should_be_patching",
+        "goodShouldBeCompromised": "good_should_be_compromised",
+        "goodShouldBeOverwhelmed": "good_should_be_overwhelmed",
+        "patchingShouldBeGood": "patching_should_be_good",
+        "patchingShouldBeCompromised": "patching_should_be_compromised",
+        "patchingShouldBeOverwhelmed": "patching_should_be_overwhelmed",
+        "patching": "patching",
+        "compromisedShouldBeGood": "compromised_should_be_good",
+        "compromisedShouldBePatching": "compromised_should_be_patching",
+        "compromisedShouldBeOverwhelmed": "compromised_should_be_overwhelmed",
+        "compromised": "compromised",
+        "overwhelmedShouldBeGood": "overwhelmed_should_be_good",
+        "overwhelmedShouldBePatching": "overwhelmed_should_be_patching",
+        "overwhelmedShouldBeCompromised": "overwhelmed_should_be_compromised",
+        "overwhelmed": "overwhelmed",
+        "goodShouldBeRepairing": "good_should_be_repairing",
+        "goodShouldBeRestoring": "good_should_be_restoring",
+        "goodShouldBeCorrupt": "good_should_be_corrupt",
+        "goodShouldBeDestroyed": "good_should_be_destroyed",
+        "repairingShouldBeGood": "repairing_should_be_good",
+        "repairingShouldBeRestoring": "repairing_should_be_restoring",
+        "repairingShouldBeCorrupt": "repairing_should_be_corrupt",
+        "repairingShouldBeDestroyed": "repairing_should_be_destroyed",
+        "repairing": "repairing",
+        "restoringShouldBeGood": "restoring_should_be_good",
+        "restoringShouldBeRepairing": "restoring_should_be_repairing",
+        "restoringShouldBeCorrupt": "restoring_should_be_corrupt",
+        "restoringShouldBeDestroyed": "restoring_should_be_destroyed",
+        "restoring": "restoring",
+        "corruptShouldBeGood": "corrupt_should_be_good",
+        "corruptShouldBeRepairing": "corrupt_should_be_repairing",
+        "corruptShouldBeRestoring": "corrupt_should_be_restoring",
+        "corruptShouldBeDestroyed": "corrupt_should_be_destroyed",
+        "corrupt": "corrupt",
+        "destroyedShouldBeGood": "destroyed_should_be_good",
+        "destroyedShouldBeRepairing": "destroyed_should_be_repairing",
+        "destroyedShouldBeRestoring": "destroyed_should_be_restoring",
+        "destroyedShouldBeCorrupt": "destroyed_should_be_corrupt",
+        "destroyed": "destroyed",
+        "scanning": "scanning",
+        "redIerRunning": "red_ier_running",
+        "greenIerBlocked": "green_ier_blocked",
+        "osPatchingDuration": "os_patching_duration",
+        "nodeResetDuration": "node_reset_duration",
+        "nodeBootingDuration": "node_booting_duration",
+        "nodeShutdownDuration": "node_shutdown_duration",
+        "servicePatchingDuration": "service_patching_duration",
+        "fileSystemRepairingLimit": "file_system_repairing_limit",
+        "fileSystemRestoringLimit": "file_system_restoring_limit",
+        "fileSystemScanningLimit": "file_system_scanning_limit",
+    }
+    return key_mapping[legacy_key]
--- a/src/primaite/data_viz/init.py
+++ b/src/primaite/data_viz/init.py
@@ -0,0 +1,15 @@
+# © Crown-owned copyright 2023, Defence Science and Technology Laboratory UK
+"""Utility to generate plots of sessions metrics after PrimAITE."""
+from enum import Enum
+
+
+class PlotlyTemplate(Enum):
+    """The built-in plotly templates."""
+
+    PLOTLY = "plotly"
+    PLOTLY_WHITE = "plotly_white"
+    PLOTLY_DARK = "plotly_dark"
+    GGPLOT2 = "ggplot2"
+    SEABORN = "seaborn"
+    SIMPLE_WHITE = "simple_white"
+    NONE = "none"
--- a/src/primaite/data_viz/session_plots.py
+++ b/src/primaite/data_viz/session_plots.py
@@ -0,0 +1,73 @@
+# © Crown-owned copyright 2023, Defence Science and Technology Laboratory UK
+from pathlib import Path
+from typing import Dict, Optional, Union
+
+import plotly.graph_objects as go
+import polars as pl
+import yaml
+from plotly.graph_objs import Figure
+
+from primaite import PRIMAITE_PATHS
+
+
+def get_plotly_config() -> Dict:
+    """Get the plotly config from primaite_config.yaml."""
+    with open(PRIMAITE_PATHS.app_config_file_path, "r") as file:
+        primaite_config = yaml.safe_load(file)
+    return primaite_config["session"]["outputs"]["plots"]
+
+
+def plot_av_reward_per_episode(
+    av_reward_per_episode_csv: Union[str, Path],
+    title: Optional[str] = None,
+    subtitle: Optional[str] = None,
+) -> Figure:
+    """
+    Plot the average reward per episode from a csv session output.
+
+    :param av_reward_per_episode_csv: The average reward per episode csv
+        file path.
+    :param title: The plot title. This is optional.
+    :param subtitle: The plot subtitle. This is optional.
+    :return: The plot as an instance of ``plotly.graph_objs._figure.Figure``.
+    """
+    df = pl.read_csv(av_reward_per_episode_csv)
+
+    if title:
+        if subtitle:
+            title = f"{title} <br>{subtitle}</sup>"
+    else:
+        if subtitle:
+            title = subtitle
+
+    config = get_plotly_config()
+    layout = go.Layout(
+        autosize=config["size"]["auto_size"],
+        width=config["size"]["width"],
+        height=config["size"]["height"],
+    )
+    # Create the line graph with a colored line
+    fig = go.Figure(layout=layout)
+    fig.update_layout(template=config["template"])
+    fig.add_trace(
+        go.Scatter(
+            x=df["Episode"],
+            y=df["Average Reward"],
+            mode="lines",
+            name="Mean Reward per Episode",
+        )
+    )
+
+    # Set the layout of the graph
+    fig.update_layout(
+        xaxis={
+            "title": "Episode",
+            "type": "linear",
+            "rangeslider": {"visible": config["range_slider"]},
+        },
+        yaxis={"title": "Average Reward"},
+        title=title,
+        showlegend=False,
+    )
+
+    return fig
--- a/src/primaite/environment/init.py
+++ b/src/primaite/environment/init.py
@@ -1 +1,2 @@
-# Crown Copyright (C) Dstl 2022. DEFCON 703. Shared in confidence.
+# © Crown-owned copyright 2023, Defence Science and Technology Laboratory UK
+"""Gym/Gymnasium environment for RL agents consisting of a simulated computer network."""
--- a/src/primaite/environment/observations.py
+++ b/src/primaite/environment/observations.py
@@ -0,0 +1,735 @@
+# © Crown-owned copyright 2023, Defence Science and Technology Laboratory UK
+"""Module for handling configurable observation spaces in PrimAITE."""
+import logging
+from abc import ABC, abstractmethod
+from logging import Logger
+from typing import Dict, Final, List, Tuple, TYPE_CHECKING, Union
+
+import numpy as np
+from gym import spaces
+
+from primaite.acl.acl_rule import ACLRule
+from primaite.common.enums import FileSystemState, HardwareState, RulePermissionType, SoftwareState
+from primaite.nodes.active_node import ActiveNode
+from primaite.nodes.service_node import ServiceNode
+
+# This dependency is only needed for type hints,
+# TYPE_CHECKING is False at runtime and True when typecheckers are performing typechecking
+# Therefore, this avoids circular dependency problem.
+if TYPE_CHECKING:
+    from primaite.environment.primaite_env import Primaite
+
+
+_LOGGER: Logger = logging.getLogger(__name__)
+
+
+class AbstractObservationComponent(ABC):
+    """Represents a part of the PrimAITE observation space."""
+
+    @abstractmethod
+    def __init__(self, env: "Primaite") -> None:
+        """
+        Initialise observation component.
+
+        :param env: Primaite training environment.
+        :type env: Primaite
+        """
+        _LOGGER.info(f"Initialising {self} observation component")
+        self.env: "Primaite" = env
+        self.space: spaces.Space
+        self.current_observation: np.ndarray  # type might be too restrictive?
+        self.structure: List[str]
+        return NotImplemented
+
+    @abstractmethod
+    def update(self) -> None:
+        """Update the observation based on the current state of the environment."""
+        self.current_observation = NotImplemented
+
+    @abstractmethod
+    def generate_structure(self) -> List[str]:
+        """Return a list of labels for the components of the flattened observation space."""
+        return NotImplemented
+
+
+class NodeLinkTable(AbstractObservationComponent):
+    """
+    Table with nodes and links as rows and hardware/software status as cols.
+
+    This will create the observation space formatted as a table of integers.
+    There is one row per node, followed by one row per link.
+    The number of columns is 4 plus one per service. They are:
+
+        * node/link ID
+        * node hardware status / 0 for links
+        * node operating system status (if active/service) / 0 for links
+        * node file system status (active/service only) / 0 for links
+        * node service1 status / traffic load from that service for links
+        * node service2 status / traffic load from that service for links
+        * ...
+        * node serviceN status / traffic load from that service for links
+
+    For example if the environment has 5 nodes, 7 links, and 3 services, the observation space shape will be
+    ``(12, 7)``
+    """
+
+    _FIXED_PARAMETERS: int = 4
+    _MAX_VAL: int = 1_000_000_000
+    _DATA_TYPE: type = np.int64
+
+    def __init__(self, env: "Primaite") -> None:
+        """
+        Initialise a NodeLinkTable observation space component.
+
+        :param env: Training environment.
+        :type env: Primaite
+        """
+        super().__init__(env)
+
+        # 1. Define the shape of your observation space component
+        num_items = self.env.num_links + self.env.num_nodes
+        num_columns = self.env.num_services + self._FIXED_PARAMETERS
+        observation_shape = (num_items, num_columns)
+
+        # 2. Create Observation space
+        self.space = spaces.Box(
+            low=0,
+            high=self._MAX_VAL,
+            shape=observation_shape,
+            dtype=self._DATA_TYPE,
+        )
+
+        # 3. Initialise Observation with zeroes
+        self.current_observation = np.zeros(observation_shape, dtype=self._DATA_TYPE)
+
+        self.structure = self.generate_structure()
+
+    def update(self) -> None:
+        """
+        Update the observation based on current environment state.
+
+        The structure of the observation space is described in :class:`.NodeLinkTable`
+        """
+        item_index = 0
+        nodes = self.env.nodes
+        links = self.env.links
+        # Do nodes first
+        for _, node in nodes.items():
+            self.current_observation[item_index][0] = int(node.node_id)
+            self.current_observation[item_index][1] = node.hardware_state.value
+            if isinstance(node, ActiveNode) or isinstance(node, ServiceNode):
+                self.current_observation[item_index][2] = node.software_state.value
+                self.current_observation[item_index][3] = node.file_system_state_observed.value
+            else:
+                self.current_observation[item_index][2] = 0
+                self.current_observation[item_index][3] = 0
+            service_index = 4
+            if isinstance(node, ServiceNode):
+                for service in self.env.services_list:
+                    if node.has_service(service):
+                        self.current_observation[item_index][service_index] = node.get_service_state(service).value
+                    else:
+                        self.current_observation[item_index][service_index] = 0
+                    service_index += 1
+            else:
+                # Not a service node
+                for service in self.env.services_list:
+                    self.current_observation[item_index][service_index] = 0
+                    service_index += 1
+            item_index += 1
+
+        # Now do links
+        for _, link in links.items():
+            self.current_observation[item_index][0] = int(link.get_id())
+            self.current_observation[item_index][1] = 0
+            self.current_observation[item_index][2] = 0
+            self.current_observation[item_index][3] = 0
+            protocol_list = link.get_protocol_list()
+            protocol_index = 0
+            for protocol in protocol_list:
+                self.current_observation[item_index][protocol_index + 4] = protocol.get_load()
+                protocol_index += 1
+            item_index += 1
+
+    def generate_structure(self) -> List[str]:
+        """Return a list of labels for the components of the flattened observation space."""
+        nodes = self.env.nodes.values()
+        links = self.env.links.values()
+
+        structure = []
+
+        for i, node in enumerate(nodes):
+            node_id = node.node_id
+            node_labels = [
+                f"node_{node_id}_id",
+                f"node_{node_id}_hardware_status",
+                f"node_{node_id}_os_status",
+                f"node_{node_id}_fs_status",
+            ]
+            for j, serv in enumerate(self.env.services_list):
+                node_labels.append(f"node_{node_id}_service_{serv}_status")
+
+            structure.extend(node_labels)
+
+        for i, link in enumerate(links):
+            link_id = link.id
+            link_labels = [
+                f"link_{link_id}_id",
+                f"link_{link_id}_n/a",
+                f"link_{link_id}_n/a",
+                f"link_{link_id}_n/a",
+            ]
+            for j, serv in enumerate(self.env.services_list):
+                link_labels.append(f"link_{link_id}_service_{serv}_load")
+
+            structure.extend(link_labels)
+        return structure
+
+
+class NodeStatuses(AbstractObservationComponent):
+    """
+    Flat list of nodes' hardware, OS, file system, and service states.
+
+    The MultiDiscrete observation space can be though of as a one-dimensional vector of discrete states, represented by
+    integers.
+    Each node has 3 elements plus 1 per service. It will have the following structure:
+    .. code-block::
+
+        [
+            node1 hardware state,
+            node1 OS state,
+            node1 file system state,
+            node1 service1 state,
+            node1 service2 state,
+            node1 serviceN state (one for each service),
+            node2 hardware state,
+            node2 OS state,
+            node2 file system state,
+            node2 service1 state,
+            node2 service2 state,
+            node2 serviceN state (one for each service),
+            ...
+        ]
+    """
+
+    _DATA_TYPE: type = np.int64
+
+    def __init__(self, env: "Primaite") -> None:
+        """
+        Initialise a NodeStatuses observation component.
+
+        :param env: Training environment.
+        :type env: Primaite
+        """
+        super().__init__(env)
+
+        # 1. Define the shape of your observation space component
+        node_shape = [
+            len(HardwareState) + 1,
+            len(SoftwareState) + 1,
+            len(FileSystemState) + 1,
+        ]
+        services_shape = [len(SoftwareState) + 1] * self.env.num_services
+        node_shape = node_shape + services_shape
+
+        shape = node_shape * self.env.num_nodes
+        # 2. Create Observation space
+        self.space = spaces.MultiDiscrete(shape)
+
+        # 3. Initialise observation with zeroes
+        self.current_observation = np.zeros(len(shape), dtype=self._DATA_TYPE)
+        self.structure = self.generate_structure()
+
+    def update(self) -> None:
+        """
+        Update the observation based on current environment state.
+
+        The structure of the observation space is described in :class:`.NodeStatuses`
+        """
+        obs = []
+        for _, node in self.env.nodes.items():
+            hardware_state = node.hardware_state.value
+            software_state = 0
+            file_system_state = 0
+            service_states = [0] * self.env.num_services
+
+            if isinstance(node, ActiveNode):
+                software_state = node.software_state.value
+                file_system_state = node.file_system_state_observed.value
+
+            if isinstance(node, ServiceNode):
+                for i, service in enumerate(self.env.services_list):
+                    if node.has_service(service):
+                        service_states[i] = node.get_service_state(service).value
+            obs.extend(
+                [
+                    hardware_state,
+                    software_state,
+                    file_system_state,
+                    *service_states,
+                ]
+            )
+        self.current_observation[:] = obs
+
+    def generate_structure(self) -> List[str]:
+        """Return a list of labels for the components of the flattened observation space."""
+        services = self.env.services_list
+
+        structure = []
+
+        for _, node in self.env.nodes.items():
+            node_id = node.node_id
+            structure.append(f"node_{node_id}_hardware_state_NONE")
+            for state in HardwareState:
+                structure.append(f"node_{node_id}_hardware_state_{state.name}")
+            structure.append(f"node_{node_id}_software_state_NONE")
+            for state in SoftwareState:
+                structure.append(f"node_{node_id}_software_state_{state.name}")
+            structure.append(f"node_{node_id}_file_system_state_NONE")
+            for state in FileSystemState:
+                structure.append(f"node_{node_id}_file_system_state_{state.name}")
+            for service in services:
+                structure.append(f"node_{node_id}_service_{service}_state_NONE")
+                for state in SoftwareState:
+                    structure.append(f"node_{node_id}_service_{service}_state_{state.name}")
+        return structure
+
+
+class LinkTrafficLevels(AbstractObservationComponent):
+    """
+    Flat list of traffic levels encoded into banded categories.
+
+    For each link, total traffic or traffic per service is encoded into a categorical value.
+    For example, if ``quantisation_levels=5``, the traffic levels represent these values:
+
+        * 0 = No traffic (0% of bandwidth)
+        * 1 = No traffic (0%-33% of bandwidth)
+        * 2 = No traffic (33%-66% of bandwidth)
+        * 3 = No traffic (66%-100% of bandwidth)
+        * 4 = No traffic (100% of bandwidth)
+
+    .. note::
+        The lowest category always corresponds to no traffic and the highest category to the link being at max capacity.
+        Any amount of traffic between 0% and 100% (exclusive) is divided evenly into the remaining categories.
+
+    """
+
+    _DATA_TYPE: type = np.int64
+
+    def __init__(
+        self,
+        env: "Primaite",
+        combine_service_traffic: bool = False,
+        quantisation_levels: int = 5,
+    ) -> None:
+        """
+        Initialise a LinkTrafficLevels observation component.
+
+        :param env: The environment that forms the basis of the observations
+        :type env: Primaite
+        :param combine_service_traffic: Whether to consider total traffic on the link, or each protocol individually,
+            defaults to False
+        :type combine_service_traffic: bool, optional
+        :param quantisation_levels: How many bands to consider when converting the traffic amount to a categorical
+            value, defaults to 5
+        :type quantisation_levels: int, optional
+        """
+        if quantisation_levels < 3:
+            _msg = (
+                f"quantisation_levels must be 3 or more because the lowest and highest levels are "
+                f"reserved for 0% and 100% link utilisation, got {quantisation_levels} instead. "
+                f"Resetting to default value (5)"
+            )
+            _LOGGER.warning(_msg)
+            quantisation_levels = 5
+
+        super().__init__(env)
+
+        self._combine_service_traffic: bool = combine_service_traffic
+        self._quantisation_levels: int = quantisation_levels
+        self._entries_per_link: int = 1
+
+        if not self._combine_service_traffic:
+            self._entries_per_link = self.env.num_services
+
+        # 1. Define the shape of your observation space component
+        shape = [self._quantisation_levels] * self.env.num_links * self._entries_per_link
+
+        # 2. Create Observation space
+        self.space = spaces.MultiDiscrete(shape)
+
+        # 3. Initialise observation with zeroes
+        self.current_observation = np.zeros(len(shape), dtype=self._DATA_TYPE)
+
+        self.structure = self.generate_structure()
+
+    def update(self) -> None:
+        """
+        Update the observation based on current environment state.
+
+        The structure of the observation space is described in :class:`.LinkTrafficLevels`
+        """
+        obs = []
+        for _, link in self.env.links.items():
+            bandwidth = link.bandwidth
+            if self._combine_service_traffic:
+                loads = [link.get_current_load()]
+            else:
+                loads = [protocol.get_load() for protocol in link.protocol_list]
+
+            for load in loads:
+                if load <= 0:
+                    traffic_level = 0
+                elif load >= bandwidth:
+                    traffic_level = self._quantisation_levels - 1
+                else:
+                    traffic_level = (load / bandwidth) // (1 / (self._quantisation_levels - 2)) + 1
+
+                obs.append(int(traffic_level))
+
+        self.current_observation[:] = obs
+
+    def generate_structure(self) -> List[str]:
+        """Return a list of labels for the components of the flattened observation space."""
+        structure = []
+        for _, link in self.env.links.items():
+            link_id = link.id
+            if self._combine_service_traffic:
+                protocols = ["overall"]
+            else:
+                protocols = [protocol.name for protocol in link.protocol_list]
+
+            for p in protocols:
+                for i in range(self._quantisation_levels):
+                    structure.append(f"link_{link_id}_{p}_traffic_level_{i}")
+        return structure
+
+
+class AccessControlList(AbstractObservationComponent):
+    """Flat list of all the Access Control Rules in the Access Control List.
+
+    The MultiDiscrete observation space can be though of as a one-dimensional vector of discrete states, represented by
+    integers.
+
+    Each ACL Rule has 6 elements. It will have the following structure:
+    .. code-block::
+        [
+            acl_rule1 permission,
+            acl_rule1 source_ip,
+            acl_rule1 dest_ip,
+            acl_rule1 protocol,
+            acl_rule1 port,
+            acl_rule1 position,
+            acl_rule2 permission,
+            acl_rule2 source_ip,
+            acl_rule2 dest_ip,
+            acl_rule2 protocol,
+            acl_rule2 port,
+            acl_rule2 position,
+            ...
+        ]
+
+
+    Terms (for ACL Observation Space):
+        [0, 1, 2] - Permission (0 = NA, 1 = DENY, 2 = ALLOW)
+        [0, num nodes] - Source IP (0 = NA, 1 = any, then 2 -> x resolving to Node IDs)
+        [0, num nodes] - Dest IP (0 = NA, 1 = any, then 2 -> x resolving to Node IDs)
+        [0, num services] - Protocol (0 = NA, 1 = any, then 2 -> x resolving to protocol)
+        [0, num ports] - Port (0 = NA, 1 = any, then 2 -> x resolving to port)
+        [0, max acl rules - 1] - Position (0 = NA, 1 = first index, then 2 -> x index resolving to acl rule in acl list)
+
+    NOTE: NA is Non-Applicable - this means the ACL Rule in the list is a NoneType and NOT an ACLRule object.
+    """
+
+    _DATA_TYPE: type = np.int64
+
+    def __init__(self, env: "Primaite"):
+        """
+        Initialise an AccessControlList observation component.
+
+        :param env: The environment that forms the basis of the observations
+        :type env: Primaite
+        """
+        super().__init__(env)
+
+        # 1. Define the shape of your observation space component
+        # The NA and ANY types means that there are 2 extra items for Nodes, Services and Ports.
+        # Number of ACL rules incremented by 1 for positions starting at index 0.
+        acl_shape = [
+            len(RulePermissionType),
+            len(env.nodes) + 2,
+            len(env.nodes) + 2,
+            len(env.services_list) + 2,
+            len(env.ports_list) + 2,
+            env.max_number_acl_rules,
+        ]
+        shape = acl_shape * self.env.max_number_acl_rules
+
+        # 2. Create Observation space
+        self.space = spaces.MultiDiscrete(shape)
+
+        # 3. Initialise observation with zeroes
+        self.current_observation = np.zeros(len(shape), dtype=self._DATA_TYPE)
+
+        self.structure = self.generate_structure()
+
+    def update(self) -> None:
+        """Update the observation based on current environment state.
+
+        The structure of the observation space is described in :class:`.AccessControlList`
+        """
+        obs = []
+
+        for index in range(0, len(self.env.acl.acl)):
+            acl_rule = self.env.acl.acl[index]
+            if isinstance(acl_rule, ACLRule):
+                permission = acl_rule.permission
+                source_ip = acl_rule.source_ip
+                dest_ip = acl_rule.dest_ip
+                protocol = acl_rule.protocol
+                port = acl_rule.port
+                position = index
+                # Map each ACL attribute from what it was to an integer to fit the observation space
+                source_ip_int = None
+                dest_ip_int = None
+                if permission == RulePermissionType.DENY:
+                    permission_int = 1
+                else:
+                    permission_int = 2
+                if source_ip == "ANY":
+                    source_ip_int = 1
+                else:
+                    # Map Node ID (+ 1) to source IP address
+                    nodes = list(self.env.nodes.values())
+                    for node in nodes:
+                        if (
+                            isinstance(node, ServiceNode) or isinstance(node, ActiveNode)
+                        ) and node.ip_address == source_ip:
+                            source_ip_int = int(node.node_id) + 1
+                            break
+                if dest_ip == "ANY":
+                    dest_ip_int = 1
+                else:
+                    # Map Node ID (+ 1) to dest IP address
+                    # Index of Nodes start at 1 so + 1 is needed so NA can be added.
+                    nodes = list(self.env.nodes.values())
+                    for node in nodes:
+                        if (
+                            isinstance(node, ServiceNode) or isinstance(node, ActiveNode)
+                        ) and node.ip_address == dest_ip:
+                            dest_ip_int = int(node.node_id) + 1
+                if protocol == "ANY":
+                    protocol_int = 1
+                else:
+                    # Index of protocols and ports start from 0 so + 2 is needed to add NA and ANY
+                    try:
+                        protocol_int = self.env.services_list.index(protocol) + 2
+                    except AttributeError:
+                        _LOGGER.info(f"Service {protocol} could not be found")
+                        protocol_int = None
+                if port == "ANY":
+                    port_int = 1
+                else:
+                    if port in self.env.ports_list:
+                        port_int = self.env.ports_list.index(port) + 2
+                    else:
+                        _LOGGER.info(f"Port {port} could not be found.")
+                        port_int = None
+                # Add to current obs
+                obs.extend(
+                    [
+                        permission_int,
+                        source_ip_int,
+                        dest_ip_int,
+                        protocol_int,
+                        port_int,
+                        position,
+                    ]
+                )
+
+            else:
+                # The Nothing or NA representation of 'NONE' ACL rules
+                obs.extend([0, 0, 0, 0, 0, 0])
+
+        self.current_observation[:] = obs
+
+    def generate_structure(self) -> List[str]:
+        """Return a list of labels for the components of the flattened observation space."""
+        structure = []
+        for acl_rule in self.env.acl.acl:
+            acl_rule_id = self.env.acl.acl.index(acl_rule)
+
+            for permission in RulePermissionType:
+                structure.append(f"acl_rule_{acl_rule_id}_permission_{permission.name}")
+
+            structure.append(f"acl_rule_{acl_rule_id}_source_ip_ANY")
+            for node in self.env.nodes.keys():
+                structure.append(f"acl_rule_{acl_rule_id}_source_ip_{node}")
+
+            structure.append(f"acl_rule_{acl_rule_id}_dest_ip_ANY")
+            for node in self.env.nodes.keys():
+                structure.append(f"acl_rule_{acl_rule_id}_dest_ip_{node}")
+
+            structure.append(f"acl_rule_{acl_rule_id}_service_ANY")
+            for service in self.env.services_list:
+                structure.append(f"acl_rule_{acl_rule_id}_service_{service}")
+
+            structure.append(f"acl_rule_{acl_rule_id}_port_ANY")
+            for port in self.env.ports_list:
+                structure.append(f"acl_rule_{acl_rule_id}_port_{port}")
+
+        return structure
+
+
+class ObservationsHandler:
+    """
+    Component-based observation space handler.
+
+    This allows users to configure observation spaces by mixing and matching components. Each component can also define
+    further parameters to make them more flexible.
+    """
+
+    _REGISTRY: Final[Dict[str, type]] = {
+        "NODE_LINK_TABLE": NodeLinkTable,
+        "NODE_STATUSES": NodeStatuses,
+        "LINK_TRAFFIC_LEVELS": LinkTrafficLevels,
+        "ACCESS_CONTROL_LIST": AccessControlList,
+    }
+
+    def __init__(self) -> None:
+        """Initialise the observation handler."""
+        self.registered_obs_components: List[AbstractObservationComponent] = []
+
+        # internal the observation space (unflattened version of space if flatten=True)
+        self._space: spaces.Space
+        # flattened version of the observation space
+        self._flat_space: spaces.Space
+
+        self._observation: Union[Tuple[np.ndarray], np.ndarray]
+        # used for transactions and when flatten=true
+        self._flat_observation: np.ndarray
+
+    def update_obs(self) -> None:
+        """Fetch fresh information about the environment."""
+        current_obs = []
+        for obs in self.registered_obs_components:
+            obs.update()
+            current_obs.append(obs.current_observation)
+
+        if len(current_obs) == 1:
+            self._observation = current_obs[0]
+        else:
+            self._observation = tuple(current_obs)
+        self._flat_observation = spaces.flatten(self._space, self._observation)
+
+    def register(self, obs_component: AbstractObservationComponent) -> None:
+        """
+        Add a component for this handler to track.
+
+        :param obs_component: The component to add.
+        :type obs_component: AbstractObservationComponent
+        """
+        self.registered_obs_components.append(obs_component)
+        self.update_space()
+
+    def deregister(self, obs_component: AbstractObservationComponent) -> None:
+        """
+        Remove a component from this handler.
+
+        :param obs_component: Which component to remove. It must exist within this object's
+            ``registered_obs_components`` attribute.
+        :type obs_component: AbstractObservationComponent
+        """
+        self.registered_obs_components.remove(obs_component)
+        self.update_space()
+
+    def update_space(self) -> None:
+        """Rebuild the handler's composite observation space from its components."""
+        component_spaces = []
+        for obs_comp in self.registered_obs_components:
+            component_spaces.append(obs_comp.space)
+
+        # if there are multiple components, build a composite tuple space
+        if len(component_spaces) == 1:
+            self._space = component_spaces[0]
+        else:
+            self._space = spaces.Tuple(component_spaces)
+        if len(component_spaces) > 0:
+            self._flat_space = spaces.flatten_space(self._space)
+        else:
+            self._flat_space = spaces.Box(0, 1, (0,))
+
+    @property
+    def space(self) -> spaces.Space:
+        """Observation space, return the flattened version if flatten is True."""
+        if len(self.registered_obs_components) > 1:
+            return self._flat_space
+        else:
+            return self._space
+
+    @property
+    def current_observation(self) -> Union[np.ndarray, Tuple[np.ndarray]]:
+        """Current observation, return the flattened version if flatten is True."""
+        if len(self.registered_obs_components) > 1:
+            return self._flat_observation
+        else:
+            return self._observation
+
+    @classmethod
+    def from_config(cls, env: "Primaite", obs_space_config: dict) -> "ObservationsHandler":
+        """
+        Parse a config dictinary, return a new observation handler populated with new observation component objects.
+
+        The expected format for the config dictionary is:
+
+        .. code-block:: python
+
+            config = {
+                components: [
+                    {
+                        "name": "<COMPONENT1_NAME>"
+                    },
+                    {
+                        "name": "<COMPONENT2_NAME>"
+                        "options": {"opt1": val1, "opt2": val2}
+                    },
+                    {
+                        ...
+                    },
+                ]
+            }
+
+        :return: Observation handler
+        :rtype: primaite.environment.observations.ObservationsHandler
+        """
+        # Instantiate the handler
+        handler = cls()
+
+        for component_cfg in obs_space_config["components"]:
+            # Figure out which class can instantiate the desired component
+            comp_type = component_cfg["name"]
+            comp_class = cls._REGISTRY[comp_type]
+
+            # Create the component with options from the YAML
+            options = component_cfg.get("options") or {}
+            component = comp_class(env, **options)
+
+            handler.register(component)
+
+        handler.update_obs()
+        return handler
+
+    def describe_structure(self) -> List[str]:
+        """
+        Create a list of names for the features of the obs space.
+
+        The order of labels follows the flattened version of the space.
+        """
+        # as it turns out it's not possible to take the gym flattening function and apply it to our labels so we have
+        # to fake it. each component has to just hard-code the expected label order after flattening...
+
+        labels = []
+        for obs_comp in self.registered_obs_components:
+            labels.extend(obs_comp.structure)
+
+        return labels
--- a/src/primaite/environment/primaite_env.py
+++ b/src/primaite/environment/primaite_env.py
--- a/src/primaite/environment/reward.py
+++ b/src/primaite/environment/reward.py
@@ -1,19 +1,32 @@
-# Crown Copyright (C) Dstl 2022. DEFCON 703. Shared in confidence.
+# © Crown-owned copyright 2023, Defence Science and Technology Laboratory UK
 """Implements reward function."""
-from primaite.common.enums import FILE_SYSTEM_STATE, HARDWARE_STATE, SOFTWARE_STATE
+from logging import Logger
+from typing import Dict, TYPE_CHECKING, Union
+
+from primaite import getLogger
+from primaite.common.custom_typing import NodeUnion
+from primaite.common.enums import FileSystemState, HardwareState, SoftwareState
+from primaite.common.service import Service
 from primaite.nodes.active_node import ActiveNode
 from primaite.nodes.service_node import ServiceNode

+if TYPE_CHECKING:
+    from primaite.config.training_config import TrainingConfig
+    from primaite.pol.ier import IER
+
+_LOGGER: Logger = getLogger(__name__)
+

 def calculate_reward_function(
-    initial_nodes,
-    final_nodes,
-    reference_nodes,
-    green_iers,
-    red_iers,
-    step_count,
-    config_values,
-):
+    initial_nodes: Dict[str, NodeUnion],
+    final_nodes: Dict[str, NodeUnion],
+    reference_nodes: Dict[str, NodeUnion],
+    green_iers: Dict[str, "IER"],
+    green_iers_reference: Dict[str, "IER"],
+    red_iers: Dict[str, "IER"],
+    step_count: int,
+    config_values: "TrainingConfig",
+) -> float:
    """
    Compares the states of the initial and final nodes/links to get a reward.

@@ -26,35 +39,27 @@ def calculate_reward_function(
        step_count: current step
        config_values: Config values
    """
-    reward_value = 0
+    reward_value: float = 0.0

-    # For each node, compare operating state, o/s operating state, service states
+    # For each node, compare hardware state, SoftwareState, service states
    for node_key, final_node in final_nodes.items():
        initial_node = initial_nodes[node_key]
        reference_node = reference_nodes[node_key]

-        # Operating State
-        reward_value += score_node_operating_state(
-            final_node, initial_node, reference_node, config_values
-        )
+        # Hardware State
+        reward_value += score_node_operating_state(final_node, initial_node, reference_node, config_values)

-        # Operating System State
+        # Software State
        if isinstance(final_node, ActiveNode) or isinstance(final_node, ServiceNode):
-            reward_value += score_node_os_state(
-                final_node, initial_node, reference_node, config_values
-            )
+            reward_value += score_node_os_state(final_node, initial_node, reference_node, config_values)

        # Service State
        if isinstance(final_node, ServiceNode):
-            reward_value += score_node_service_state(
-                final_node, initial_node, reference_node, config_values
-            )
+            reward_value += score_node_service_state(final_node, initial_node, reference_node, config_values)

        # File System State
        if isinstance(final_node, ActiveNode):
-            reward_value += score_node_file_system(
-                final_node, initial_node, reference_node, config_values
-            )
+            reward_value += score_node_file_system(final_node, initial_node, reference_node, config_values)

    # Go through each red IER - penalise if it is running
    for ier_key, ier_value in red_iers.items():
@@ -65,22 +70,40 @@ def calculate_reward_function(
                reward_value += config_values.red_ier_running

    # Go through each green IER - penalise if it's not running (weighted)
+    # but only if it's supposed to be running (it's running in reference)
    for ier_key, ier_value in green_iers.items():
+        reference_ier = green_iers_reference[ier_key]
        start_step = ier_value.get_start_step()
        stop_step = ier_value.get_end_step()
        if step_count >= start_step and step_count <= stop_step:
-            if not ier_value.get_is_running():
-                reward_value += (
-                    config_values.green_ier_blocked
-                    * ier_value.get_mission_criticality()
-                )
+            reference_blocked = not reference_ier.get_is_running()
+            live_blocked = not ier_value.get_is_running()
+            ier_reward = config_values.green_ier_blocked * ier_value.get_mission_criticality()

+            if live_blocked and not reference_blocked:
+                reward_value += ier_reward
+            elif live_blocked and reference_blocked:
+                _LOGGER.debug(
+                    (
+                        f"IER {ier_key} is blocked in the reference and live environments. "
+                        f"Penalty of {ier_reward} was NOT applied."
+                    )
+                )
+            elif not live_blocked and reference_blocked:
+                _LOGGER.debug(
+                    (
+                        f"IER {ier_key} is blocked in the reference env but not in the live one. "
+                        f"Penalty of {ier_reward} was NOT applied."
+                    )
+                )
    return reward_value


-def score_node_operating_state(final_node, initial_node, reference_node, config_values):
+def score_node_operating_state(
+    final_node: NodeUnion, initial_node: NodeUnion, reference_node: NodeUnion, config_values: "TrainingConfig"
+) -> float:
    """
-    Calculates score relating to the operating state of a node.
+    Calculates score relating to the hardware state of a node.

    Args:
        final_node: The node after red and blue agents take effect
@@ -88,37 +111,36 @@ def score_node_operating_state(final_node, initial_node, reference_node, config_
        reference_node: The node if there had been no red or blue effect
        config_values: Config values
    """
-    score = 0
-    final_node_operating_state = final_node.get_state()
-    initial_node_operating_state = initial_node.get_state()
-    reference_node_operating_state = reference_node.get_state()
+    score: float = 0.0
+    final_node_operating_state = final_node.hardware_state
+    reference_node_operating_state = reference_node.hardware_state

    if final_node_operating_state == reference_node_operating_state:
        # All is well - we're no different from the reference situation
        score += config_values.all_ok
    else:
        # We're different from the reference situation
-        # Need to compare initial and final state of node (i.e. after red and blue actions)
-        if initial_node_operating_state == HARDWARE_STATE.ON:
-            if final_node_operating_state == HARDWARE_STATE.OFF:
+        # Need to compare reference and final (current) state of node (i.e. at every step)
+        if reference_node_operating_state == HardwareState.ON:
+            if final_node_operating_state == HardwareState.OFF:
                score += config_values.off_should_be_on
-            elif final_node_operating_state == HARDWARE_STATE.RESETTING:
+            elif final_node_operating_state == HardwareState.RESETTING:
                score += config_values.resetting_should_be_on
            else:
                pass
-        elif initial_node_operating_state == HARDWARE_STATE.OFF:
-            if final_node_operating_state == HARDWARE_STATE.ON:
+        elif reference_node_operating_state == HardwareState.OFF:
+            if final_node_operating_state == HardwareState.ON:
                score += config_values.on_should_be_off
-            elif final_node_operating_state == HARDWARE_STATE.RESETTING:
+            elif final_node_operating_state == HardwareState.RESETTING:
                score += config_values.resetting_should_be_off
            else:
                pass
-        elif initial_node_operating_state == HARDWARE_STATE.RESETTING:
-            if final_node_operating_state == HARDWARE_STATE.ON:
+        elif reference_node_operating_state == HardwareState.RESETTING:
+            if final_node_operating_state == HardwareState.ON:
                score += config_values.on_should_be_resetting
-            elif final_node_operating_state == HARDWARE_STATE.OFF:
+            elif final_node_operating_state == HardwareState.OFF:
                score += config_values.off_should_be_resetting
-            elif final_node_operating_state == HARDWARE_STATE.RESETTING:
+            elif final_node_operating_state == HardwareState.RESETTING:
                score += config_values.resetting
            else:
                pass
@@ -128,9 +150,14 @@ def score_node_operating_state(final_node, initial_node, reference_node, config_
    return score


-def score_node_os_state(final_node, initial_node, reference_node, config_values):
+def score_node_os_state(
+    final_node: Union[ActiveNode, ServiceNode],
+    initial_node: Union[ActiveNode, ServiceNode],
+    reference_node: Union[ActiveNode, ServiceNode],
+    config_values: "TrainingConfig",
+) -> float:
    """
-    Calculates score relating to the operating system state of a node.
+    Calculates score relating to the Software State of a node.

    Args:
        final_node: The node after red and blue agents take effect
@@ -138,39 +165,38 @@ def score_node_os_state(final_node, initial_node, reference_node, config_values)
        reference_node: The node if there had been no red or blue effect
        config_values: Config values
    """
-    score = 0
-    final_node_os_state = final_node.get_os_state()
-    initial_node_os_state = initial_node.get_os_state()
-    reference_node_os_state = reference_node.get_os_state()
+    score: float = 0.0
+    final_node_os_state = final_node.software_state
+    reference_node_os_state = reference_node.software_state

    if final_node_os_state == reference_node_os_state:
        # All is well - we're no different from the reference situation
        score += config_values.all_ok
    else:
        # We're different from the reference situation
-        # Need to compare initial and final state of node (i.e. after red and blue actions)
-        if initial_node_os_state == SOFTWARE_STATE.GOOD:
-            if final_node_os_state == SOFTWARE_STATE.PATCHING:
+        # Need to compare reference and final (current) state of node (i.e. at every step)
+        if reference_node_os_state == SoftwareState.GOOD:
+            if final_node_os_state == SoftwareState.PATCHING:
                score += config_values.patching_should_be_good
-            elif final_node_os_state == SOFTWARE_STATE.COMPROMISED:
+            elif final_node_os_state == SoftwareState.COMPROMISED:
                score += config_values.compromised_should_be_good
            else:
                pass
-        elif initial_node_os_state == SOFTWARE_STATE.PATCHING:
-            if final_node_os_state == SOFTWARE_STATE.GOOD:
+        elif reference_node_os_state == SoftwareState.PATCHING:
+            if final_node_os_state == SoftwareState.GOOD:
                score += config_values.good_should_be_patching
-            elif final_node_os_state == SOFTWARE_STATE.COMPROMISED:
+            elif final_node_os_state == SoftwareState.COMPROMISED:
                score += config_values.compromised_should_be_patching
-            elif final_node_os_state == SOFTWARE_STATE.PATCHING:
+            elif final_node_os_state == SoftwareState.PATCHING:
                score += config_values.patching
            else:
                pass
-        elif initial_node_os_state == SOFTWARE_STATE.COMPROMISED:
-            if final_node_os_state == SOFTWARE_STATE.GOOD:
+        elif reference_node_os_state == SoftwareState.COMPROMISED:
+            if final_node_os_state == SoftwareState.GOOD:
                score += config_values.good_should_be_compromised
-            elif final_node_os_state == SOFTWARE_STATE.PATCHING:
+            elif final_node_os_state == SoftwareState.PATCHING:
                score += config_values.patching_should_be_compromised
-            elif final_node_os_state == SOFTWARE_STATE.COMPROMISED:
+            elif final_node_os_state == SoftwareState.COMPROMISED:
                score += config_values.compromised
            else:
                pass
@@ -180,7 +206,9 @@ def score_node_os_state(final_node, initial_node, reference_node, config_values)
    return score


-def score_node_service_state(final_node, initial_node, reference_node, config_values):
+def score_node_service_state(
+    final_node: ServiceNode, initial_node: ServiceNode, reference_node: ServiceNode, config_values: "TrainingConfig"
+) -> float:
    """
    Calculates score relating to the service state(s) of a node.

@@ -190,60 +218,59 @@ def score_node_service_state(final_node, initial_node, reference_node, config_va
        reference_node: The node if there had been no red or blue effect
        config_values: Config values
    """
-    score = 0
-    final_node_services = final_node.get_services()
-    initial_node_services = initial_node.get_services()
-    reference_node_services = reference_node.get_services()
+    score: float = 0.0
+    final_node_services: Dict[str, Service] = final_node.services
+    reference_node_services: Dict[str, Service] = reference_node.services

    for service_key, final_service in final_node_services.items():
        reference_service = reference_node_services[service_key]
-        initial_service = initial_node_services[service_key]
+        final_service = final_node_services[service_key]

-        if final_service.get_state() == reference_service.get_state():
+        if final_service.software_state == reference_service.software_state:
            # All is well - we're no different from the reference situation
            score += config_values.all_ok
        else:
            # We're different from the reference situation
-            # Need to compare initial and final state of node (i.e. after red and blue actions)
-            if initial_service.get_state() == SOFTWARE_STATE.GOOD:
-                if final_service.get_state() == SOFTWARE_STATE.PATCHING:
+            # Need to compare reference and final state of node (i.e. at every step)
+            if reference_service.software_state == SoftwareState.GOOD:
+                if final_service.software_state == SoftwareState.PATCHING:
                    score += config_values.patching_should_be_good
-                elif final_service.get_state() == SOFTWARE_STATE.COMPROMISED:
+                elif final_service.software_state == SoftwareState.COMPROMISED:
                    score += config_values.compromised_should_be_good
-                elif final_service.get_state() == SOFTWARE_STATE.OVERWHELMED:
+                elif final_service.software_state == SoftwareState.OVERWHELMED:
                    score += config_values.overwhelmed_should_be_good
                else:
                    pass
-            elif initial_service.get_state() == SOFTWARE_STATE.PATCHING:
-                if final_service.get_state() == SOFTWARE_STATE.GOOD:
+            elif reference_service.software_state == SoftwareState.PATCHING:
+                if final_service.software_state == SoftwareState.GOOD:
                    score += config_values.good_should_be_patching
-                elif final_service.get_state() == SOFTWARE_STATE.COMPROMISED:
+                elif final_service.software_state == SoftwareState.COMPROMISED:
                    score += config_values.compromised_should_be_patching
-                elif final_service.get_state() == SOFTWARE_STATE.OVERWHELMED:
+                elif final_service.software_state == SoftwareState.OVERWHELMED:
                    score += config_values.overwhelmed_should_be_patching
-                elif final_service.get_state() == SOFTWARE_STATE.PATCHING:
+                elif final_service.software_state == SoftwareState.PATCHING:
                    score += config_values.patching
                else:
                    pass
-            elif initial_service.get_state() == SOFTWARE_STATE.COMPROMISED:
-                if final_service.get_state() == SOFTWARE_STATE.GOOD:
+            elif reference_service.software_state == SoftwareState.COMPROMISED:
+                if final_service.software_state == SoftwareState.GOOD:
                    score += config_values.good_should_be_compromised
-                elif final_service.get_state() == SOFTWARE_STATE.PATCHING:
+                elif final_service.software_state == SoftwareState.PATCHING:
                    score += config_values.patching_should_be_compromised
-                elif final_service.get_state() == SOFTWARE_STATE.COMPROMISED:
+                elif final_service.software_state == SoftwareState.COMPROMISED:
                    score += config_values.compromised
-                elif final_service.get_state() == SOFTWARE_STATE.OVERWHELMED:
+                elif final_service.software_state == SoftwareState.OVERWHELMED:
                    score += config_values.overwhelmed_should_be_compromised
                else:
                    pass
-            elif initial_service.get_state() == SOFTWARE_STATE.OVERWHELMED:
-                if final_service.get_state() == SOFTWARE_STATE.GOOD:
+            elif reference_service.software_state == SoftwareState.OVERWHELMED:
+                if final_service.software_state == SoftwareState.GOOD:
                    score += config_values.good_should_be_overwhelmed
-                elif final_service.get_state() == SOFTWARE_STATE.PATCHING:
+                elif final_service.software_state == SoftwareState.PATCHING:
                    score += config_values.patching_should_be_overwhelmed
-                elif final_service.get_state() == SOFTWARE_STATE.COMPROMISED:
+                elif final_service.software_state == SoftwareState.COMPROMISED:
                    score += config_values.compromised_should_be_overwhelmed
-                elif final_service.get_state() == SOFTWARE_STATE.OVERWHELMED:
+                elif final_service.software_state == SoftwareState.OVERWHELMED:
                    score += config_values.overwhelmed
                else:
                    pass
@@ -253,7 +280,12 @@ def score_node_service_state(final_node, initial_node, reference_node, config_va
    return score


-def score_node_file_system(final_node, initial_node, reference_node, config_values):
+def score_node_file_system(
+    final_node: Union[ActiveNode, ServiceNode],
+    initial_node: Union[ActiveNode, ServiceNode],
+    reference_node: Union[ActiveNode, ServiceNode],
+    config_values: "TrainingConfig",
+) -> float:
    """
    Calculates score relating to the file system state of a node.

@@ -262,13 +294,12 @@ def score_node_file_system(final_node, initial_node, reference_node, config_valu
        initial_node: The node before red and blue agents take effect
        reference_node: The node if there had been no red or blue effect
    """
-    score = 0
-    final_node_file_system_state = final_node.get_file_system_state_actual()
-    initial_node_file_system_state = initial_node.get_file_system_state_actual()
-    reference_node_file_system_state = reference_node.get_file_system_state_actual()
+    score: float = 0.0
+    final_node_file_system_state = final_node.file_system_state_actual
+    reference_node_file_system_state = reference_node.file_system_state_actual

-    final_node_scanning_state = final_node.is_scanning_file_system()
-    reference_node_scanning_state = reference_node.is_scanning_file_system()
+    final_node_scanning_state = final_node.file_system_scanning
+    reference_node_scanning_state = reference_node.file_system_scanning

    # File System State
    if final_node_file_system_state == reference_node_file_system_state:
@@ -276,67 +307,67 @@ def score_node_file_system(final_node, initial_node, reference_node, config_valu
        score += config_values.all_ok
    else:
        # We're different from the reference situation
-        # Need to compare initial and final state of node (i.e. after red and blue actions)
-        if initial_node_file_system_state == FILE_SYSTEM_STATE.GOOD:
-            if final_node_file_system_state == FILE_SYSTEM_STATE.REPAIRING:
+        # Need to compare reference and final state of node (i.e. at every step)
+        if reference_node_file_system_state == FileSystemState.GOOD:
+            if final_node_file_system_state == FileSystemState.REPAIRING:
                score += config_values.repairing_should_be_good
-            elif final_node_file_system_state == FILE_SYSTEM_STATE.RESTORING:
+            elif final_node_file_system_state == FileSystemState.RESTORING:
                score += config_values.restoring_should_be_good
-            elif final_node_file_system_state == FILE_SYSTEM_STATE.CORRUPT:
+            elif final_node_file_system_state == FileSystemState.CORRUPT:
                score += config_values.corrupt_should_be_good
-            elif final_node_file_system_state == FILE_SYSTEM_STATE.DESTROYED:
+            elif final_node_file_system_state == FileSystemState.DESTROYED:
                score += config_values.destroyed_should_be_good
            else:
                pass
-        elif initial_node_file_system_state == FILE_SYSTEM_STATE.REPAIRING:
-            if final_node_file_system_state == FILE_SYSTEM_STATE.GOOD:
+        elif reference_node_file_system_state == FileSystemState.REPAIRING:
+            if final_node_file_system_state == FileSystemState.GOOD:
                score += config_values.good_should_be_repairing
-            elif final_node_file_system_state == FILE_SYSTEM_STATE.RESTORING:
+            elif final_node_file_system_state == FileSystemState.RESTORING:
                score += config_values.restoring_should_be_repairing
-            elif final_node_file_system_state == FILE_SYSTEM_STATE.CORRUPT:
+            elif final_node_file_system_state == FileSystemState.CORRUPT:
                score += config_values.corrupt_should_be_repairing
-            elif final_node_file_system_state == FILE_SYSTEM_STATE.DESTROYED:
+            elif final_node_file_system_state == FileSystemState.DESTROYED:
                score += config_values.destroyed_should_be_repairing
-            elif final_node_file_system_state == FILE_SYSTEM_STATE.REPAIRING:
+            elif final_node_file_system_state == FileSystemState.REPAIRING:
                score += config_values.repairing
            else:
                pass
-        elif initial_node_file_system_state == FILE_SYSTEM_STATE.RESTORING:
-            if final_node_file_system_state == FILE_SYSTEM_STATE.GOOD:
+        elif reference_node_file_system_state == FileSystemState.RESTORING:
+            if final_node_file_system_state == FileSystemState.GOOD:
                score += config_values.good_should_be_restoring
-            elif final_node_file_system_state == FILE_SYSTEM_STATE.REPAIRING:
+            elif final_node_file_system_state == FileSystemState.REPAIRING:
                score += config_values.repairing_should_be_restoring
-            elif final_node_file_system_state == FILE_SYSTEM_STATE.CORRUPT:
+            elif final_node_file_system_state == FileSystemState.CORRUPT:
                score += config_values.corrupt_should_be_restoring
-            elif final_node_file_system_state == FILE_SYSTEM_STATE.DESTROYED:
+            elif final_node_file_system_state == FileSystemState.DESTROYED:
                score += config_values.destroyed_should_be_restoring
-            elif final_node_file_system_state == FILE_SYSTEM_STATE.RESTORING:
+            elif final_node_file_system_state == FileSystemState.RESTORING:
                score += config_values.restoring
            else:
                pass
-        elif initial_node_file_system_state == FILE_SYSTEM_STATE.CORRUPT:
-            if final_node_file_system_state == FILE_SYSTEM_STATE.GOOD:
+        elif reference_node_file_system_state == FileSystemState.CORRUPT:
+            if final_node_file_system_state == FileSystemState.GOOD:
                score += config_values.good_should_be_corrupt
-            elif final_node_file_system_state == FILE_SYSTEM_STATE.REPAIRING:
+            elif final_node_file_system_state == FileSystemState.REPAIRING:
                score += config_values.repairing_should_be_corrupt
-            elif final_node_file_system_state == FILE_SYSTEM_STATE.RESTORING:
+            elif final_node_file_system_state == FileSystemState.RESTORING:
                score += config_values.restoring_should_be_corrupt
-            elif final_node_file_system_state == FILE_SYSTEM_STATE.DESTROYED:
+            elif final_node_file_system_state == FileSystemState.DESTROYED:
                score += config_values.destroyed_should_be_corrupt
-            elif final_node_file_system_state == FILE_SYSTEM_STATE.CORRUPT:
+            elif final_node_file_system_state == FileSystemState.CORRUPT:
                score += config_values.corrupt
            else:
                pass
-        elif initial_node_file_system_state == FILE_SYSTEM_STATE.DESTROYED:
-            if final_node_file_system_state == FILE_SYSTEM_STATE.GOOD:
+        elif reference_node_file_system_state == FileSystemState.DESTROYED:
+            if final_node_file_system_state == FileSystemState.GOOD:
                score += config_values.good_should_be_destroyed
-            elif final_node_file_system_state == FILE_SYSTEM_STATE.REPAIRING:
+            elif final_node_file_system_state == FileSystemState.REPAIRING:
                score += config_values.repairing_should_be_destroyed
-            elif final_node_file_system_state == FILE_SYSTEM_STATE.RESTORING:
+            elif final_node_file_system_state == FileSystemState.RESTORING:
                score += config_values.restoring_should_be_destroyed
-            elif final_node_file_system_state == FILE_SYSTEM_STATE.CORRUPT:
+            elif final_node_file_system_state == FileSystemState.CORRUPT:
                score += config_values.corrupt_should_be_destroyed
-            elif final_node_file_system_state == FILE_SYSTEM_STATE.DESTROYED:
+            elif final_node_file_system_state == FileSystemState.DESTROYED:
                score += config_values.destroyed
            else:
                pass
--- a/src/primaite/exceptions.py
+++ b/src/primaite/exceptions.py
@@ -0,0 +1,11 @@
+# © Crown-owned copyright 2023, Defence Science and Technology Laboratory UK
+class PrimaiteError(Exception):
+    """The root PrimAITe Error."""
+
+    pass
+
+
+class RLlibAgentError(PrimaiteError):
+    """Raised when there is a generic error with a RLlib agent that is specific to PRimAITE."""
+
+    pass
--- a/src/primaite/links/init.py
+++ b/src/primaite/links/init.py
@@ -1 +1,2 @@
-# Crown Copyright (C) Dstl 2022. DEFCON 703. Shared in confidence.
+# © Crown-owned copyright 2023, Defence Science and Technology Laboratory UK
+"""Network connections between nodes in the simulation."""
--- a/src/primaite/links/link.py
+++ b/src/primaite/links/link.py
@@ -1,5 +1,6 @@
-# Crown Copyright (C) Dstl 2022. DEFCON 703. Shared in confidence.
+# © Crown-owned copyright 2023, Defence Science and Technology Laboratory UK
 """The link class."""
+from typing import List

 from primaite.common.protocol import Protocol

@@ -7,28 +8,27 @@ from primaite.common.protocol import Protocol
 class Link(object):
    """Link class."""

-    def __init__(self, _id, _bandwidth, _source_node_name, _dest_node_name, _services):
+    def __init__(self, _id: str, _bandwidth: int, _source_node_name: str, _dest_node_name: str, _services: str) -> None:
        """
-        Init.
+        Initialise a Link within the simulated network.

-        Args:
-            _id: The IER id
-            _bandwidth: The bandwidth of the link (bps)
-            _source_node_name: The name of the source node
-            _dest_node_name: The name of the destination node
-            _protocols: The protocols to add to the link
+        :param _id: The IER id
+        :param _bandwidth: The bandwidth of the link (bps)
+        :param _source_node_name: The name of the source node
+        :param _dest_node_name: The name of the destination node
+        :param _protocols: The protocols to add to the link
        """
-        self.id = _id
-        self.bandwidth = _bandwidth
-        self.source_node_name = _source_node_name
-        self.dest_node_name = _dest_node_name
-        self.protocol_list = []
+        self.id: str = _id
+        self.bandwidth: int = _bandwidth
+        self.source_node_name: str = _source_node_name
+        self.dest_node_name: str = _dest_node_name
+        self.protocol_list: List[Protocol] = []

        # Add the default protocols
        for protocol_name in _services:
            self.add_protocol(protocol_name)

-    def add_protocol(self, _protocol):
+    def add_protocol(self, _protocol: str) -> None:
        """
        Adds a new protocol to the list of protocols on this link.

@@ -37,7 +37,7 @@ class Link(object):
        """
        self.protocol_list.append(Protocol(_protocol))

-    def get_id(self):
+    def get_id(self) -> str:
        """
        Gets link ID.

@@ -46,7 +46,7 @@ class Link(object):
        """
        return self.id

-    def get_source_node_name(self):
+    def get_source_node_name(self) -> str:
        """
        Gets source node name.

@@ -55,7 +55,7 @@ class Link(object):
        """
        return self.source_node_name

-    def get_dest_node_name(self):
+    def get_dest_node_name(self) -> str:
        """
        Gets destination node name.

@@ -64,7 +64,7 @@ class Link(object):
        """
        return self.dest_node_name

-    def get_bandwidth(self):
+    def get_bandwidth(self) -> int:
        """
        Gets bandwidth of link.

@@ -73,7 +73,7 @@ class Link(object):
        """
        return self.bandwidth

-    def get_protocol_list(self):
+    def get_protocol_list(self) -> List[Protocol]:
        """
        Gets list of protocols on this link.

@@ -82,7 +82,7 @@ class Link(object):
        """
        return self.protocol_list

-    def get_current_load(self):
+    def get_current_load(self) -> int:
        """
        Gets current total load on this link.

@@ -94,7 +94,7 @@ class Link(object):
            total_load += protocol.get_load()
        return total_load

-    def add_protocol_load(self, _protocol, _load):
+    def add_protocol_load(self, _protocol: str, _load: int) -> None:
        """
        Adds a loading to a protocol on this link.

@@ -108,7 +108,7 @@ class Link(object):
            else:
                pass

-    def clear_traffic(self):
+    def clear_traffic(self) -> None:
        """Clears all traffic on this link."""
        for protocol in self.protocol_list:
            protocol.clear_load()
--- a/src/primaite/main.py
+++ b/src/primaite/main.py
@@ -1,379 +1,49 @@
-# Crown Copyright (C) Dstl 2022. DEFCON 703. Shared in confidence.
-"""
-Primaite - main (harness) module.
+# © Crown-owned copyright 2023, Defence Science and Technology Laboratory UK
+"""The main PrimAITE session runner module."""
+import argparse
+from pathlib import Path
+from typing import Optional, Union

-Coding Standards: PEP 8
-"""
+from primaite import getLogger
+from primaite.primaite_session import PrimaiteSession

-import logging
-import os.path
-import time
-from datetime import datetime
-
-import yaml
-from stable_baselines3 import A2C, PPO
-from stable_baselines3.common.evaluation import evaluate_policy
-from stable_baselines3.ppo import MlpPolicy as PPOMlp
-
-from primaite.common.config_values_main import config_values_main
-from primaite.environment.primaite_env import Primaite
-from primaite.transactions.transactions_to_file import write_transaction_to_file
-
-# FUNCTIONS #
+_LOGGER = getLogger(__name__)


-def run_generic():
-    """Run against a generic agent."""
-    for episode in range(0, config_values.num_episodes):
-        for step in range(0, config_values.num_steps):
-            # Send the observation space to the agent to get an action
-            # TEMP - random action for now
-            # action = env.blue_agent_action(obs)
-            action = env.action_space.sample()
+def run(
+    training_config_path: Optional[Union[str, Path]] = "",
+    lay_down_config_path: Optional[Union[str, Path]] = "",
+    session_path: Optional[Union[str, Path]] = None,
+) -> None:
+    """
+    Run the PrimAITE Session.

-            # Run the simulation step on the live environment
-            obs, reward, done, info = env.step(action)
+    :param training_config_path: YAML file containing configurable items defined in
+            `primaite.config.training_config.TrainingConfig`
+        :type training_config_path: Union[path, str]
+        :param lay_down_config_path: YAML file containing configurable items for generating network laydown.
+        :type lay_down_config_path: Union[path, str]
+        :param session_path: directory path of the session to load
+    """
+    session = PrimaiteSession(training_config_path, lay_down_config_path, session_path)

-            # Break if done is True
-            if done:
-                break
-
-            # Introduce a delay between steps
-            time.sleep(config_values.time_delay / 1000)
-
-        # Reset the environment at the end of the episode
-        env.reset()
-
-    env.close()
+    session.setup()
+    session.learn()
+    session.evaluate()


-def run_stable_baselines3_ppo():
-    """Run against a stable_baselines3 PPO agent."""
-    if config_values.load_agent == True:
-        try:
-            agent = PPO.load(
-                config_values.agent_load_file,
-                env,
-                verbose=0,
-                n_steps=config_values.num_steps,
-            )
-        except Exception:
-            print(
-                "ERROR: Could not load agent at location: "
-                + config_values.agent_load_file
-            )
-            logging.error("Could not load agent")
-            logging.error("Exception occured", exc_info=True)
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--tc")
+    parser.add_argument("--ldc")
+    parser.add_argument("--load")
+
+    args = parser.parse_args()
+    if args.load:
+        run(session_path=args.load)
    else:
-        agent = PPO(PPOMlp, env, verbose=0, n_steps=config_values.num_steps)
-
-    if config_values.session_type == "TRAINING":
-        # We're in a training session
-        print("Starting training session...")
-        logging.info("Starting training session...")
-        for episode in range(0, config_values.num_episodes):
-            agent.learn(total_timesteps=1)
-        save_agent(agent)
-    else:
-        # Default to being in an evaluation session
-        print("Starting evaluation session...")
-        logging.info("Starting evaluation session...")
-        evaluate_policy(agent, env, n_eval_episodes=config_values.num_episodes)
-
-    env.close()
-
-
-def run_stable_baselines3_a2c():
-    """Run against a stable_baselines3 A2C agent."""
-    if config_values.load_agent == True:
-        try:
-            agent = A2C.load(
-                config_values.agent_load_file,
-                env,
-                verbose=0,
-                n_steps=config_values.num_steps,
-            )
-        except Exception:
-            print(
-                "ERROR: Could not load agent at location: "
-                + config_values.agent_load_file
-            )
-            logging.error("Could not load agent")
-            logging.error("Exception occured", exc_info=True)
-    else:
-        agent = A2C("MlpPolicy", env, verbose=0, n_steps=config_values.num_steps)
-
-    if config_values.session_type == "TRAINING":
-        # We're in a training session
-        print("Starting training session...")
-        logging.info("Starting training session...")
-        for episode in range(0, config_values.num_episodes):
-            agent.learn(total_timesteps=1)
-        save_agent(agent)
-    else:
-        # Default to being in an evaluation session
-        print("Starting evaluation session...")
-        logging.info("Starting evaluation session...")
-        evaluate_policy(agent, env, n_eval_episodes=config_values.num_episodes)
-
-    env.close()
-
-
-def save_agent(_agent):
-    """Persist an agent (only works for stable baselines3 agents at present)."""
-    now = datetime.now()  # current date and time
-    time = now.strftime("%Y%m%d_%H%M%S")
-
-    try:
-        path = "outputs/agents/"
-        is_dir = os.path.isdir(path)
-        if not is_dir:
-            os.makedirs(path)
-        filename = "outputs/agents/agent_saved_" + time
-        _agent.save(filename)
-        logging.info("Trained agent saved as " + filename)
-    except Exception:
-        logging.error("Could not save agent")
-        logging.error("Exception occured", exc_info=True)
-
-
-def configure_logging():
-    """Configures logging."""
-    try:
-        now = datetime.now()  # current date and time
-        time = now.strftime("%Y%m%d_%H%M%S")
-        filename = "logs/app_" + time + ".log"
-        path = "logs/"
-        is_dir = os.path.isdir(path)
-        if not is_dir:
-            os.makedirs(path)
-        logging.basicConfig(
-            filename=filename,
-            filemode="w",
-            format="%(asctime)s - %(levelname)s - %(message)s",
-            datefmt="%d-%b-%y %H:%M:%S",
-            level=logging.INFO,
-        )
-    except Exception:
-        print("ERROR: Could not start logging")
-
-
-def load_config_values():
-    """Loads the config values from the main config file into a config object."""
-    try:
-        # Generic
-        config_values.agent_identifier = config_data["agentIdentifier"]
-        config_values.num_episodes = int(config_data["numEpisodes"])
-        config_values.time_delay = int(config_data["timeDelay"])
-        config_values.config_filename_use_case = config_data["configFilename"]
-        config_values.session_type = config_data["sessionType"]
-        config_values.load_agent = bool(config_data["loadAgent"])
-        config_values.agent_load_file = config_data["agentLoadFile"]
-        # Environment
-        config_values.observation_space_high_value = int(
-            config_data["observationSpaceHighValue"]
-        )
-        # Reward values
-        # Generic
-        config_values.all_ok = int(config_data["allOk"])
-        # Node Operating State
-        config_values.off_should_be_on = int(config_data["offShouldBeOn"])
-        config_values.off_should_be_resetting = int(config_data["offShouldBeResetting"])
-        config_values.on_should_be_off = int(config_data["onShouldBeOff"])
-        config_values.on_should_be_resetting = int(config_data["onShouldBeResetting"])
-        config_values.resetting_should_be_on = int(config_data["resettingShouldBeOn"])
-        config_values.resetting_should_be_off = int(config_data["resettingShouldBeOff"])
-        config_values.resetting = int(config_data["resetting"])
-        # Node O/S or Service State
-        config_values.good_should_be_patching = int(config_data["goodShouldBePatching"])
-        config_values.good_should_be_compromised = int(
-            config_data["goodShouldBeCompromised"]
-        )
-        config_values.good_should_be_overwhelmed = int(
-            config_data["goodShouldBeOverwhelmed"]
-        )
-        config_values.patching_should_be_good = int(config_data["patchingShouldBeGood"])
-        config_values.patching_should_be_compromised = int(
-            config_data["patchingShouldBeCompromised"]
-        )
-        config_values.patching_should_be_overwhelmed = int(
-            config_data["patchingShouldBeOverwhelmed"]
-        )
-        config_values.patching = int(config_data["patching"])
-        config_values.compromised_should_be_good = int(
-            config_data["compromisedShouldBeGood"]
-        )
-        config_values.compromised_should_be_patching = int(
-            config_data["compromisedShouldBePatching"]
-        )
-        config_values.compromised_should_be_overwhelmed = int(
-            config_data["compromisedShouldBeOverwhelmed"]
-        )
-        config_values.compromised = int(config_data["compromised"])
-        config_values.overwhelmed_should_be_good = int(
-            config_data["overwhelmedShouldBeGood"]
-        )
-        config_values.overwhelmed_should_be_patching = int(
-            config_data["overwhelmedShouldBePatching"]
-        )
-        config_values.overwhelmed_should_be_compromised = int(
-            config_data["overwhelmedShouldBeCompromised"]
-        )
-        config_values.overwhelmed = int(config_data["overwhelmed"])
-        # Node File System State
-        config_values.good_should_be_repairing = int(
-            config_data["goodShouldBeRepairing"]
-        )
-        config_values.good_should_be_restoring = int(
-            config_data["goodShouldBeRestoring"]
-        )
-        config_values.good_should_be_corrupt = int(config_data["goodShouldBeCorrupt"])
-        config_values.good_should_be_destroyed = int(
-            config_data["goodShouldBeDestroyed"]
-        )
-        config_values.repairing_should_be_good = int(
-            config_data["repairingShouldBeGood"]
-        )
-        config_values.repairing_should_be_restoring = int(
-            config_data["repairingShouldBeRestoring"]
-        )
-        config_values.repairing_should_be_corrupt = int(
-            config_data["repairingShouldBeCorrupt"]
-        )
-        config_values.repairing_should_be_destroyed = int(
-            config_data["repairingShouldBeDestroyed"]
-        )
-        config_values.repairing = int(config_data["repairing"])
-        config_values.restoring_should_be_good = int(
-            config_data["restoringShouldBeGood"]
-        )
-        config_values.restoring_should_be_repairing = int(
-            config_data["restoringShouldBeRepairing"]
-        )
-        config_values.restoring_should_be_corrupt = int(
-            config_data["restoringShouldBeCorrupt"]
-        )
-        config_values.restoring_should_be_destroyed = int(
-            config_data["restoringShouldBeDestroyed"]
-        )
-        config_values.restoring = int(config_data["restoring"])
-        config_values.corrupt_should_be_good = int(config_data["corruptShouldBeGood"])
-        config_values.corrupt_should_be_repairing = int(
-            config_data["corruptShouldBeRepairing"]
-        )
-        config_values.corrupt_should_be_restoring = int(
-            config_data["corruptShouldBeRestoring"]
-        )
-        config_values.corrupt_should_be_destroyed = int(
-            config_data["corruptShouldBeDestroyed"]
-        )
-        config_values.corrupt = int(config_data["corrupt"])
-        config_values.destroyed_should_be_good = int(
-            config_data["destroyedShouldBeGood"]
-        )
-        config_values.destroyed_should_be_repairing = int(
-            config_data["destroyedShouldBeRepairing"]
-        )
-        config_values.destroyed_should_be_restoring = int(
-            config_data["destroyedShouldBeRestoring"]
-        )
-        config_values.destroyed_should_be_corrupt = int(
-            config_data["destroyedShouldBeCorrupt"]
-        )
-        config_values.destroyed = int(config_data["destroyed"])
-        config_values.scanning = int(config_data["scanning"])
-        # IER status
-        config_values.red_ier_running = int(config_data["redIerRunning"])
-        config_values.green_ier_blocked = int(config_data["greenIerBlocked"])
-        # Patching / Reset durations
-        config_values.os_patching_duration = int(config_data["osPatchingDuration"])
-        config_values.node_reset_duration = int(config_data["nodeResetDuration"])
-        config_values.service_patching_duration = int(
-            config_data["servicePatchingDuration"]
-        )
-        config_values.file_system_repairing_limit = int(
-            config_data["fileSystemRepairingLimit"]
-        )
-        config_values.file_system_restoring_limit = int(
-            config_data["fileSystemRestoringLimit"]
-        )
-        config_values.file_system_scanning_limit = int(
-            config_data["fileSystemScanningLimit"]
-        )
-
-        logging.info("Training agent: " + config_values.agent_identifier)
-        logging.info(
-            "Training environment config: " + config_values.config_filename_use_case
-        )
-        logging.info(
-            "Training cycle has " + str(config_values.num_episodes) + " episodes"
-        )
-
-    except Exception:
-        logging.error("Could not save load config data")
-        logging.error("Exception occured", exc_info=True)
-
-
-# MAIN PROCESS #
-
-# Starting point
-
-# Welcome message
-print("Welcome to the Primary-level AI Training Environment (PrimAITE)")
-
-# Configure logging
-configure_logging()
-
-# Open the main config file
-try:
-    config_file_main = open("config/config_main.yaml", "r")
-    config_data = yaml.safe_load(config_file_main)
-    # Create a config class
-    config_values = config_values_main()
-    # Load in config data
-    load_config_values()
-except Exception:
-    logging.error("Could not load main config")
-    logging.error("Exception occured", exc_info=True)
-
-# Create a list of transactions
-# A transaction is an object holding the:
-# - episode #
-# - step #
-# - initial observation space
-# - action
-# - reward
-# - new observation space
-transaction_list = []
-
-# Create the Primaite environment
-try:
-    env = Primaite(config_values, transaction_list)
-    logging.info("PrimAITE environment created")
-except Exception:
-    logging.error("Could not create PrimAITE environment")
-    logging.error("Exception occured", exc_info=True)
-
-# Get the number of steps (which is stored in the child config file)
-config_values.num_steps = env.episode_steps
-
-# Run environment against an agent
-if config_values.agent_identifier == "GENERIC":
-    run_generic()
-elif config_values.agent_identifier == "STABLE_BASELINES3_PPO":
-    run_stable_baselines3_ppo()
-elif config_values.agent_identifier == "STABLE_BASELINES3_A2C":
-    run_stable_baselines3_a2c()
-
-print("Session finished")
-logging.info("Session finished")
-
-print("Saving transaction logs...")
-logging.info("Saving transaction logs...")
-
-write_transaction_to_file(transaction_list)
-
-config_file_main.close
-
-print("Finished")
-logging.info("Finished")
+        if not args.tc:
+            _LOGGER.error("Please provide a training config file using the --tc " "argument")
+        if not args.ldc:
+            _LOGGER.error("Please provide a lay down config file using the --ldc " "argument")
+        run(training_config_path=args.tc, lay_down_config_path=args.ldc)
--- a/src/primaite/nodes/init.py
+++ b/src/primaite/nodes/init.py
@@ -1 +1,2 @@
-# Crown Copyright (C) Dstl 2022. DEFCON 703. Shared in confidence.
+# © Crown-owned copyright 2023, Defence Science and Technology Laboratory UK
+"""Nodes represent network hosts in the simulation."""
--- a/src/primaite/nodes/active_node.py
+++ b/src/primaite/nodes/active_node.py
@@ -1,191 +1,178 @@
-# Crown Copyright (C) Dstl 2022. DEFCON 703. Shared in confidence.
+# © Crown-owned copyright 2023, Defence Science and Technology Laboratory UK
 """An Active Node (i.e. not an actuator)."""
-from primaite.common.enums import FILE_SYSTEM_STATE, SOFTWARE_STATE
+import logging
+from typing import Final
+
+from primaite.common.enums import FileSystemState, HardwareState, NodeType, Priority, SoftwareState
+from primaite.config.training_config import TrainingConfig
 from primaite.nodes.node import Node

+_LOGGER: Final[logging.Logger] = logging.getLogger(__name__)
+

 class ActiveNode(Node):
    """Active Node class."""

    def __init__(
        self,
-        _id,
-        _name,
-        _type,
-        _priority,
-        _state,
-        _ip_address,
-        _os_state,
-        _file_system_state,
-        _config_values,
-    ):
+        node_id: str,
+        name: str,
+        node_type: NodeType,
+        priority: Priority,
+        hardware_state: HardwareState,
+        ip_address: str,
+        software_state: SoftwareState,
+        file_system_state: FileSystemState,
+        config_values: TrainingConfig,
+    ) -> None:
        """
-        Init.
+        Initialise an active node.

-        Args:
-            _id: The node ID
-            _name: The node name
-            _type: The node type (enum)
-            _priority: The node priority (enum)
-            _state: The node state (enum)
-            _ip_address: The node IP address
-            _os_state: The node Operating System state
-            _file_system_state: The node file system state
-            _config_values: The config values
+        :param node_id: The node ID
+        :param name: The node name
+        :param node_type: The node type (enum)
+        :param priority: The node priority (enum)
+        :param hardware_state: The node Hardware State
+        :param ip_address: The node IP address
+        :param software_state: The node Software State
+        :param file_system_state: The node file system state
+        :param config_values: The config values
        """
-        super().__init__(_id, _name, _type, _priority, _state, _config_values)
-        self.ip_address = _ip_address
-        # Related to O/S
-        self.os_state = _os_state
-        self.patching_count = 0
+        super().__init__(node_id, name, node_type, priority, hardware_state, config_values)
+        self.ip_address: str = ip_address
+        # Related to Software
+        self._software_state: SoftwareState = software_state
+        self.patching_count: int = 0
        # Related to File System
-        self.file_system_state_actual = _file_system_state
-        self.file_system_state_observed = _file_system_state
-        self.file_system_scanning = False
-        self.file_system_scanning_count = 0
-        self.file_system_action_count = 0
+        self.file_system_state_actual: FileSystemState = file_system_state
+        self.file_system_state_observed: FileSystemState = file_system_state
+        self.file_system_scanning: bool = False
+        self.file_system_scanning_count: int = 0
+        self.file_system_action_count: int = 0

-    def set_ip_address(self, _ip_address):
+    @property
+    def software_state(self) -> SoftwareState:
        """
-        Sets IP address.
+        Get the software_state.

-        Args:
-            _ip_address: IP address
+        :return: The software_state.
        """
-        self.ip_address = _ip_address
+        return self._software_state

-    def get_ip_address(self):
+    @software_state.setter
+    def software_state(self, software_state: SoftwareState) -> None:
        """
-        Gets IP address.
+        Get the software_state.

-        Returns:
-             IP address
+        :param software_state: Software State.
        """
-        return self.ip_address
-
-    def set_os_state(self, _os_state):
-        """
-        Sets operating system state.
-
-        Args:
-            _os_state: Operating system state
-        """
-        self.os_state = _os_state
-        if _os_state == SOFTWARE_STATE.PATCHING:
-            self.patching_count = self.config_values.os_patching_duration
-
-    def set_os_state_if_not_compromised(self, _os_state):
-        """
-        Sets operating system state if the node is not compromised.
-
-        Args:
-            _os_state: Operating system state
-        """
-        if self.os_state != SOFTWARE_STATE.COMPROMISED:
-            self.os_state = _os_state
-            if _os_state == SOFTWARE_STATE.PATCHING:
+        if self.hardware_state != HardwareState.OFF:
+            self._software_state = software_state
+            if software_state == SoftwareState.PATCHING:
                self.patching_count = self.config_values.os_patching_duration
+        else:
+            _LOGGER.info(
+                f"The Nodes hardware state is OFF so OS State cannot be "
+                f"changed. "
+                f"Node.node_id:{self.node_id}, "
+                f"Node.hardware_state:{self.hardware_state}, "
+                f"Node.software_state:{self._software_state}"
+            )

-    def get_os_state(self):
+    def set_software_state_if_not_compromised(self, software_state: SoftwareState) -> None:
        """
-        Gets operating system state.
+        Sets Software State if the node is not compromised.

-        Returns:
-             Operating system state
+        Args:
+            software_state: Software State
        """
-        return self.os_state
+        if self.hardware_state != HardwareState.OFF:
+            if self._software_state != SoftwareState.COMPROMISED:
+                self._software_state = software_state
+                if software_state == SoftwareState.PATCHING:
+                    self.patching_count = self.config_values.os_patching_duration
+        else:
+            _LOGGER.info(
+                f"The Nodes hardware state is OFF so OS State cannot be changed."
+                f"Node.node_id:{self.node_id}, "
+                f"Node.hardware_state:{self.hardware_state}, "
+                f"Node.software_state:{self._software_state}"
+            )

-    def update_os_patching_status(self):
+    def update_os_patching_status(self) -> None:
        """Updates operating system status based on patching cycle."""
        self.patching_count -= 1
        if self.patching_count <= 0:
            self.patching_count = 0
-            self.os_state = SOFTWARE_STATE.GOOD
+            self._software_state = SoftwareState.GOOD

-    def set_file_system_state(self, _file_system_state):
+    def set_file_system_state(self, file_system_state: FileSystemState) -> None:
        """
        Sets the file system state (actual and observed).

        Args:
-            _file_system_state: File system state
+            file_system_state: File system state
        """
-        self.file_system_state_actual = _file_system_state
+        if self.hardware_state != HardwareState.OFF:
+            self.file_system_state_actual = file_system_state

-        if _file_system_state == FILE_SYSTEM_STATE.REPAIRING:
-            self.file_system_action_count = (
-                self.config_values.file_system_repairing_limit
+            if file_system_state == FileSystemState.REPAIRING:
+                self.file_system_action_count = self.config_values.file_system_repairing_limit
+                self.file_system_state_observed = FileSystemState.REPAIRING
+            elif file_system_state == FileSystemState.RESTORING:
+                self.file_system_action_count = self.config_values.file_system_restoring_limit
+                self.file_system_state_observed = FileSystemState.RESTORING
+            elif file_system_state == FileSystemState.GOOD:
+                self.file_system_state_observed = FileSystemState.GOOD
+        else:
+            _LOGGER.info(
+                f"The Nodes hardware state is OFF so File System State "
+                f"cannot be changed. "
+                f"Node.node_id:{self.node_id}, "
+                f"Node.hardware_state:{self.hardware_state}, "
+                f"Node.file_system_state.actual:{self.file_system_state_actual}"
            )
-            self.file_system_state_observed = FILE_SYSTEM_STATE.REPAIRING
-        elif _file_system_state == FILE_SYSTEM_STATE.RESTORING:
-            self.file_system_action_count = (
-                self.config_values.file_system_restoring_limit
-            )
-            self.file_system_state_observed = FILE_SYSTEM_STATE.RESTORING
-        elif _file_system_state == FILE_SYSTEM_STATE.GOOD:
-            self.file_system_state_observed = FILE_SYSTEM_STATE.GOOD

-    def set_file_system_state_if_not_compromised(self, _file_system_state):
+    def set_file_system_state_if_not_compromised(self, file_system_state: FileSystemState) -> None:
        """
        Sets the file system state (actual and observed) if not in a compromised state.

        Use for green PoL to prevent it overturning a compromised state

        Args:
-            _file_system_state: File system state
+            file_system_state: File system state
        """
-        if (
-            self.file_system_state_actual != FILE_SYSTEM_STATE.CORRUPT
-            and self.file_system_state_actual != FILE_SYSTEM_STATE.DESTROYED
-        ):
-            self.file_system_state_actual = _file_system_state
+        if self.hardware_state != HardwareState.OFF:
+            if (
+                self.file_system_state_actual != FileSystemState.CORRUPT
+                and self.file_system_state_actual != FileSystemState.DESTROYED
+            ):
+                self.file_system_state_actual = file_system_state

-            if _file_system_state == FILE_SYSTEM_STATE.REPAIRING:
-                self.file_system_action_count = (
-                    self.config_values.file_system_repairing_limit
-                )
-                self.file_system_state_observed = FILE_SYSTEM_STATE.REPAIRING
-            elif _file_system_state == FILE_SYSTEM_STATE.RESTORING:
-                self.file_system_action_count = (
-                    self.config_values.file_system_restoring_limit
-                )
-                self.file_system_state_observed = FILE_SYSTEM_STATE.RESTORING
-            elif _file_system_state == FILE_SYSTEM_STATE.GOOD:
-                self.file_system_state_observed = FILE_SYSTEM_STATE.GOOD
+                if file_system_state == FileSystemState.REPAIRING:
+                    self.file_system_action_count = self.config_values.file_system_repairing_limit
+                    self.file_system_state_observed = FileSystemState.REPAIRING
+                elif file_system_state == FileSystemState.RESTORING:
+                    self.file_system_action_count = self.config_values.file_system_restoring_limit
+                    self.file_system_state_observed = FileSystemState.RESTORING
+                elif file_system_state == FileSystemState.GOOD:
+                    self.file_system_state_observed = FileSystemState.GOOD
+        else:
+            _LOGGER.info(
+                f"The Nodes hardware state is OFF so File System State (if not "
+                f"compromised) cannot be changed. "
+                f"Node.node_id:{self.node_id}, "
+                f"Node.hardware_state:{self.hardware_state}, "
+                f"Node.file_system_state.actual:{self.file_system_state_actual}"
+            )

-    def get_file_system_state_actual(self):
-        """
-        Gets file system state (actual).
-
-        Returns:
-             File system state (actual)
-        """
-        return self.file_system_state_actual
-
-    def get_file_system_state_observed(self):
-        """
-        Gets file system state (observed).
-
-        Returns:
-             File system state (observed)
-        """
-        return self.file_system_state_observed
-
-    def start_file_system_scan(self):
+    def start_file_system_scan(self) -> None:
        """Starts a file system scan."""
        self.file_system_scanning = True
        self.file_system_scanning_count = self.config_values.file_system_scanning_limit

-    def is_scanning_file_system(self):
-        """
-        Gets true/false on whether file system is being scanned.
-
-        Returns:
-             True if file system is being scanned
-        """
-        return self.file_system_scanning
-
-    def update_file_system_state(self):
-        """Updates file system status based on scanning / restore / repair cycle."""
+    def update_file_system_state(self) -> None:
+        """Updates file system status based on scanning/restore/repair cycle."""
        # Deprecate both the action count (for restoring or reparing) and the scanning count
        self.file_system_action_count -= 1
        self.file_system_scanning_count -= 1
@@ -194,14 +181,28 @@ class ActiveNode(Node):
        if self.file_system_action_count <= 0:
            self.file_system_action_count = 0
            if (
-                self.file_system_state_actual == FILE_SYSTEM_STATE.REPAIRING
-                or self.file_system_state_actual == FILE_SYSTEM_STATE.RESTORING
+                self.file_system_state_actual == FileSystemState.REPAIRING
+                or self.file_system_state_actual == FileSystemState.RESTORING
            ):
-                self.file_system_state_actual = FILE_SYSTEM_STATE.GOOD
-                self.file_system_state_observed = FILE_SYSTEM_STATE.GOOD
+                self.file_system_state_actual = FileSystemState.GOOD
+                self.file_system_state_observed = FileSystemState.GOOD

        # Scanning updates
        if self.file_system_scanning == True and self.file_system_scanning_count < 0:
            self.file_system_state_observed = self.file_system_state_actual
            self.file_system_scanning = False
            self.file_system_scanning_count = 0
+
+    def update_resetting_status(self) -> None:
+        """Updates the reset count & makes software and file state to GOOD."""
+        super().update_resetting_status()
+        if self.resetting_count <= 0:
+            self.file_system_state_actual = FileSystemState.GOOD
+            self.software_state = SoftwareState.GOOD
+
+    def update_booting_status(self) -> None:
+        """Updates the booting software and file state to GOOD."""
+        super().update_booting_status()
+        if self.booting_count <= 0:
+            self.file_system_state_actual = FileSystemState.GOOD
+            self.software_state = SoftwareState.GOOD
--- a/src/primaite/nodes/node.py
+++ b/src/primaite/nodes/node.py
@@ -1,140 +1,79 @@
-# Crown Copyright (C) Dstl 2022. DEFCON 703. Shared in confidence.
+# © Crown-owned copyright 2023, Defence Science and Technology Laboratory UK
 """The base Node class."""
-from primaite.common.enums import HARDWARE_STATE
+from typing import Final
+
+from primaite.common.enums import HardwareState, NodeType, Priority
+from primaite.config.training_config import TrainingConfig


 class Node:
    """Node class."""

-    def __init__(self, _id, _name, _type, _priority, _state, _config_values):
+    def __init__(
+        self,
+        node_id: str,
+        name: str,
+        node_type: NodeType,
+        priority: Priority,
+        hardware_state: HardwareState,
+        config_values: TrainingConfig,
+    ) -> None:
        """
-        Init.
+        Initialise a node.

-        Args:
-            _id: The node id
-            _name: The name of the node
-            _type: The type of the node
-            _priority: The priority of the node
-            _state: The state of the node
+        :param node_id: The node id.
+        :param name: The name of the node.
+        :param node_type: The type of the node.
+        :param priority: The priority of the node.
+        :param hardware_state: The state of the node.
+        :param config_values: Config values.
        """
-        self.id = _id
-        self.name = _name
-        self.type = _type
-        self.priority = _priority
-        self.operating_state = _state
-        self.resetting_count = 0
-        self.config_values = _config_values
+        self.node_id: Final[str] = node_id
+        self.name: Final[str] = name
+        self.node_type: Final[NodeType] = node_type
+        self.priority = priority
+        self.hardware_state: HardwareState = hardware_state
+        self.resetting_count: int = 0
+        self.config_values: TrainingConfig = config_values
+        self.booting_count: int = 0
+        self.shutting_down_count: int = 0

-    def __repr__(self):
+    def __repr__(self) -> str:
        """Returns the name of the node."""
        return self.name

-    def set_id(self, _id):
-        """
-        Sets the node ID.
-
-        Args:
-            _id: The node ID
-        """
-        self.id = _id
-
-    def get_id(self):
-        """
-        Gets the node ID.
-
-        Returns:
-             The node ID
-        """
-        return self.id
-
-    def set_name(self, _name):
-        """
-        Sets the node name.
-
-        Args:
-            _name: The node name
-        """
-        self.name = _name
-
-    def get_name(self):
-        """
-        Gets the node name.
-
-        Returns:
-             The node name
-        """
-        return self.name
-
-    def set_type(self, _type):
-        """
-        Sets the node type.
-
-        Args:
-            _type: The node type
-        """
-        self.type = _type
-
-    def get_type(self):
-        """
-        Gets the node type.
-
-        Returns:
-             The node type
-        """
-        return self.type
-
-    def set_priority(self, _priority):
-        """
-        Sets the node priority.
-
-        Args:
-            _priority: The node priority
-        """
-        self.priority = _priority
-
-    def get_priority(self):
-        """
-        Gets the node priority.
-
-        Returns:
-             The node priority
-        """
-        return self.priority
-
-    def set_state(self, _state):
-        """
-        Sets the node state.
-
-        Args:
-            _state: The node state
-        """
-        self.operating_state = _state
-
-    def get_state(self):
-        """
-        Gets the node operating state.
-
-        Returns:
-             The node operating state
-        """
-        return self.operating_state
-
-    def turn_on(self):
+    def turn_on(self) -> None:
        """Sets the node state to ON."""
-        self.operating_state = HARDWARE_STATE.ON
+        self.hardware_state = HardwareState.BOOTING
+        self.booting_count = self.config_values.node_booting_duration

-    def turn_off(self):
+    def turn_off(self) -> None:
        """Sets the node state to OFF."""
-        self.operating_state = HARDWARE_STATE.OFF
+        self.hardware_state = HardwareState.OFF
+        self.shutting_down_count = self.config_values.node_shutdown_duration

-    def reset(self):
+    def reset(self) -> None:
        """Sets the node state to Resetting and starts the reset count."""
-        self.operating_state = HARDWARE_STATE.RESETTING
+        self.hardware_state = HardwareState.RESETTING
        self.resetting_count = self.config_values.node_reset_duration

-    def update_resetting_status(self):
+    def update_resetting_status(self) -> None:
        """Updates the resetting count."""
        self.resetting_count -= 1
        if self.resetting_count <= 0:
            self.resetting_count = 0
-            self.operating_state = HARDWARE_STATE.ON
+            self.hardware_state = HardwareState.ON
+
+    def update_booting_status(self) -> None:
+        """Updates the booting count."""
+        self.booting_count -= 1
+        if self.booting_count <= 0:
+            self.booting_count = 0
+            self.hardware_state = HardwareState.ON
+
+    def update_shutdown_status(self) -> None:
+        """Updates the shutdown count."""
+        self.shutting_down_count -= 1
+        if self.shutting_down_count <= 0:
+            self.shutting_down_count = 0
+            self.hardware_state = HardwareState.OFF
--- a/src/primaite/nodes/node_state_instruction_green.py
+++ b/src/primaite/nodes/node_state_instruction_green.py
@@ -1,5 +1,9 @@
-# Crown Copyright (C) Dstl 2022. DEFCON 703. Shared in confidence.
+# © Crown-owned copyright 2023, Defence Science and Technology Laboratory UK
 """Defines node behaviour for Green PoL."""
+from typing import TYPE_CHECKING, Union
+
+if TYPE_CHECKING:
+    from primaite.common.enums import FileSystemState, HardwareState, NodePOLType, SoftwareState


 class NodeStateInstructionGreen(object):
@@ -7,35 +11,35 @@ class NodeStateInstructionGreen(object):

    def __init__(
        self,
-        _id,
-        _start_step,
-        _end_step,
-        _node_id,
-        _node_pol_type,
-        _service_name,
-        _state,
-    ):
+        _id: str,
+        _start_step: int,
+        _end_step: int,
+        _node_id: str,
+        _node_pol_type: "NodePOLType",
+        _service_name: str,
+        _state: Union["HardwareState", "SoftwareState", "FileSystemState"],
+    ) -> None:
        """
-        Init.
+        Initialise the Node State Instruction.

-        Args:
-            _id: The node state instruction id
-            _start_step: The start step of the instruction
-            _end_step: The end step of the instruction
-            _node_id: The id of the associated node
-            _node_pol_type: The pattern of life type
-            _service_name: The service name
-            _state: The state (node or service)
+        :param _id: The node state instruction id
+        :param _start_step: The start step of the instruction
+        :param _end_step: The end step of the instruction
+        :param _node_id: The id of the associated node
+        :param _node_pol_type: The pattern of life type
+        :param _service_name: The service name
+        :param _state: The state (node or service)
        """
        self.id = _id
        self.start_step = _start_step
        self.end_step = _end_step
        self.node_id = _node_id
-        self.node_pol_type = _node_pol_type
-        self.service_name = _service_name  # Not used when not a service instruction
-        self.state = _state
+        self.node_pol_type: "NodePOLType" = _node_pol_type
+        self.service_name: str = _service_name  # Not used when not a service instruction
+        # TODO: confirm type of state
+        self.state: Union["HardwareState", "SoftwareState", "FileSystemState"] = _state

-    def get_start_step(self):
+    def get_start_step(self) -> int:
        """
        Gets the start step.

@@ -44,7 +48,7 @@ class NodeStateInstructionGreen(object):
        """
        return self.start_step

-    def get_end_step(self):
+    def get_end_step(self) -> int:
        """
        Gets the end step.

@@ -53,7 +57,7 @@ class NodeStateInstructionGreen(object):
        """
        return self.end_step

-    def get_node_id(self):
+    def get_node_id(self) -> str:
        """
        Gets the node ID.

@@ -62,7 +66,7 @@ class NodeStateInstructionGreen(object):
        """
        return self.node_id

-    def get_node_pol_type(self):
+    def get_node_pol_type(self) -> "NodePOLType":
        """
        Gets the node pattern of life type (enum).

@@ -71,7 +75,7 @@ class NodeStateInstructionGreen(object):
        """
        return self.node_pol_type

-    def get_service_name(self):
+    def get_service_name(self) -> str:
        """
        Gets the service name.

@@ -80,7 +84,7 @@ class NodeStateInstructionGreen(object):
        """
        return self.service_name

-    def get_state(self):
+    def get_state(self) -> Union["HardwareState", "SoftwareState", "FileSystemState"]:
        """
        Gets the state (node or service).

--- a/src/primaite/nodes/node_state_instruction_red.py
+++ b/src/primaite/nodes/node_state_instruction_red.py
@@ -1,53 +1,58 @@
-# Crown Copyright (C) Dstl 2022. DEFCON 703. Shared in confidence.
+# © Crown-owned copyright 2023, Defence Science and Technology Laboratory UK
 """Defines node behaviour for Green PoL."""
+from typing import TYPE_CHECKING, Union
+
+from primaite.common.enums import NodePOLType
+
+if TYPE_CHECKING:
+    from primaite.common.enums import FileSystemState, HardwareState, NodePOLInitiator, SoftwareState


-class NodeStateInstructionRed(object):
+class NodeStateInstructionRed:
    """The Node State Instruction class."""

    def __init__(
        self,
-        _id,
-        _start_step,
-        _end_step,
-        _target_node_id,
-        _pol_initiator,
-        _pol_type,
-        pol_protocol,
-        _pol_state,
-        _pol_source_node_id,
-        _pol_source_node_service,
-        _pol_source_node_service_state,
-    ):
+        _id: str,
+        _start_step: int,
+        _end_step: int,
+        _target_node_id: str,
+        _pol_initiator: "NodePOLInitiator",
+        _pol_type: NodePOLType,
+        pol_protocol: str,
+        _pol_state: Union["HardwareState", "SoftwareState", "FileSystemState"],
+        _pol_source_node_id: str,
+        _pol_source_node_service: str,
+        _pol_source_node_service_state: str,
+    ) -> None:
        """
-        Init.
+        Initialise the Node State Instruction for the red agent.

-        Args:
-            _id: The node state instruction id
-            _start_step: The start step of the instruction
-            _end_step: The end step of the instruction
-            _target_node_id: The id of the associated node
-            -pol_initiator: The way the PoL is applied (DIRECT, IER or SERVICE)
-            _pol_type: The pattern of life type
-            -pol_protocol: The pattern of life protocol/service affected
-            _pol_state: The state (node or service)
-            _pol_source_node_id: The source node Id (used for initiator type SERVICE)
-            _pol_source_node_service: The source node service (used for initiator type SERVICE)
-            _pol_source_node_service_state: The source node service state (used for initiator type SERVICE)
+        :param _id: The node state instruction id
+        :param _start_step: The start step of the instruction
+        :param _end_step: The end step of the instruction
+        :param _target_node_id: The id of the associated node
+        :param -pol_initiator: The way the PoL is applied (DIRECT, IER or SERVICE)
+        :param _pol_type: The pattern of life type
+        :param pol_protocol: The pattern of life protocol/service affected
+        :param _pol_state: The state (node or service)
+        :param _pol_source_node_id: The source node Id (used for initiator type SERVICE)
+        :param _pol_source_node_service: The source node service (used for initiator type SERVICE)
+        :param _pol_source_node_service_state: The source node service state (used for initiator type SERVICE)
        """
-        self.id = _id
-        self.start_step = _start_step
-        self.end_step = _end_step
-        self.target_node_id = _target_node_id
-        self.initiator = _pol_initiator
-        self.pol_type = _pol_type
-        self.service_name = pol_protocol  # Not used when not a service instruction
-        self.state = _pol_state
-        self.source_node_id = _pol_source_node_id
-        self.source_node_service = _pol_source_node_service
+        self.id: str = _id
+        self.start_step: int = _start_step
+        self.end_step: int = _end_step
+        self.target_node_id: str = _target_node_id
+        self.initiator: "NodePOLInitiator" = _pol_initiator
+        self.pol_type: NodePOLType = _pol_type
+        self.service_name: str = pol_protocol  # Not used when not a service instruction
+        self.state: Union["HardwareState", "SoftwareState", "FileSystemState"] = _pol_state
+        self.source_node_id: str = _pol_source_node_id
+        self.source_node_service: str = _pol_source_node_service
        self.source_node_service_state = _pol_source_node_service_state

-    def get_start_step(self):
+    def get_start_step(self) -> int:
        """
        Gets the start step.

@@ -56,7 +61,7 @@ class NodeStateInstructionRed(object):
        """
        return self.start_step

-    def get_end_step(self):
+    def get_end_step(self) -> int:
        """
        Gets the end step.

@@ -65,7 +70,7 @@ class NodeStateInstructionRed(object):
        """
        return self.end_step

-    def get_target_node_id(self):
+    def get_target_node_id(self) -> str:
        """
        Gets the node ID.

@@ -74,7 +79,7 @@ class NodeStateInstructionRed(object):
        """
        return self.target_node_id

-    def get_initiator(self):
+    def get_initiator(self) -> "NodePOLInitiator":
        """
        Gets the initiator.

@@ -83,7 +88,7 @@ class NodeStateInstructionRed(object):
        """
        return self.initiator

-    def get_pol_type(self):
+    def get_pol_type(self) -> NodePOLType:
        """
        Gets the node pattern of life type (enum).

@@ -92,7 +97,7 @@ class NodeStateInstructionRed(object):
        """
        return self.pol_type

-    def get_service_name(self):
+    def get_service_name(self) -> str:
        """
        Gets the service name.

@@ -101,7 +106,7 @@ class NodeStateInstructionRed(object):
        """
        return self.service_name

-    def get_state(self):
+    def get_state(self) -> Union["HardwareState", "SoftwareState", "FileSystemState"]:
        """
        Gets the state (node or service).

@@ -110,7 +115,7 @@ class NodeStateInstructionRed(object):
        """
        return self.state

-    def get_source_node_id(self):
+    def get_source_node_id(self) -> str:
        """
        Gets the source node id (used for initiator type SERVICE).

@@ -119,7 +124,7 @@ class NodeStateInstructionRed(object):
        """
        return self.source_node_id

-    def get_source_node_service(self):
+    def get_source_node_service(self) -> str:
        """
        Gets the source node service (used for initiator type SERVICE).

@@ -128,7 +133,7 @@ class NodeStateInstructionRed(object):
        """
        return self.source_node_service

-    def get_source_node_service_state(self):
+    def get_source_node_service_state(self) -> str:
        """
        Gets the source node service state (used for initiator type SERVICE).

--- a/src/primaite/nodes/passive_node.py
+++ b/src/primaite/nodes/passive_node.py
@@ -1,32 +1,42 @@
-# Crown Copyright (C) Dstl 2022. DEFCON 703. Shared in confidence.
+# © Crown-owned copyright 2023, Defence Science and Technology Laboratory UK
 """The Passive Node class (i.e. an actuator)."""
-
+from primaite.common.enums import HardwareState, NodeType, Priority
+from primaite.config.training_config import TrainingConfig
 from primaite.nodes.node import Node


 class PassiveNode(Node):
    """The Passive Node class."""

-    def __init__(self, _id, _name, _type, _priority, _state, _config_values):
+    def __init__(
+        self,
+        node_id: str,
+        name: str,
+        node_type: NodeType,
+        priority: Priority,
+        hardware_state: HardwareState,
+        config_values: TrainingConfig,
+    ) -> None:
        """
-        Init.
+        Initialise a passive node.

-        Args:
-            _id: The node id
-            _name: The name of the node
-            _type: The type of the node
-            _priority: The priority of the node
-            _state: The state of the node
+        :param node_id: The node id.
+        :param name: The name of the node.
+        :param node_type: The type of the node.
+        :param priority: The priority of the node.
+        :param hardware_state: The state of the node.
+        :param config_values: Config values.
        """
        # Pass through to Super for now
-        super().__init__(_id, _name, _type, _priority, _state, _config_values)
+        super().__init__(node_id, name, node_type, priority, hardware_state, config_values)

-    def get_ip_address(self):
+    @property
+    def ip_address(self) -> str:
        """
-        Gets the node IP address.
+        Gets the node IP address as an empty string.

-        Returns:
-             The node IP address
+        No concept of IP address for passive nodes for now.
+
+        :return: The node IP address.
        """
-        # No concept of IP address for passive nodes for now
        return ""
--- a/src/primaite/nodes/service_node.py
+++ b/src/primaite/nodes/service_node.py
@@ -1,173 +1,190 @@
-# Crown Copyright (C) Dstl 2022. DEFCON 703. Shared in confidence.
+# © Crown-owned copyright 2023, Defence Science and Technology Laboratory UK
 """A Service Node (i.e. not an actuator)."""
-from primaite.common.enums import SOFTWARE_STATE
+import logging
+from typing import Dict, Final
+
+from primaite.common.enums import FileSystemState, HardwareState, NodeType, Priority, SoftwareState
+from primaite.common.service import Service
+from primaite.config.training_config import TrainingConfig
 from primaite.nodes.active_node import ActiveNode

+_LOGGER: Final[logging.Logger] = logging.getLogger(__name__)
+

 class ServiceNode(ActiveNode):
    """ServiceNode class."""

    def __init__(
        self,
-        _id,
-        _name,
-        _type,
-        _priority,
-        _state,
-        _ip_address,
-        _os_state,
-        _file_system_state,
-        _config_values,
-    ):
+        node_id: str,
+        name: str,
+        node_type: NodeType,
+        priority: Priority,
+        hardware_state: HardwareState,
+        ip_address: str,
+        software_state: SoftwareState,
+        file_system_state: FileSystemState,
+        config_values: TrainingConfig,
+    ) -> None:
        """
-        Init.
+        Initialise a Service Node.

-        Args:
-            _id: The node id
-            _name: The name of the node
-            _type: The type of the node
-            _priority: The priority of the node
-            _state: The state of the node
-            _ipAddress: The IP address of the node
-            _osState: The operating system state of the node
-            _file_system_state: The file system state of the node
+        :param node_id: The node ID
+        :param name: The node name
+        :param node_type: The node type (enum)
+        :param priority: The node priority (enum)
+        :param hardware_state: The node Hardware State
+        :param ip_address: The node IP address
+        :param software_state: The node Software State
+        :param file_system_state: The node file system state
+        :param config_values: The config values
        """
        super().__init__(
-            _id,
-            _name,
-            _type,
-            _priority,
-            _state,
-            _ip_address,
-            _os_state,
-            _file_system_state,
-            _config_values,
+            node_id,
+            name,
+            node_type,
+            priority,
+            hardware_state,
+            ip_address,
+            software_state,
+            file_system_state,
+            config_values,
        )
-        self.services = {}
+        self.services: Dict[str, Service] = {}

-    def add_service(self, _service):
+    def add_service(self, service: Service) -> None:
        """
        Adds a service to the node.

-        Args:
-            _service: The service to add
+        :param service: The service to add
        """
-        self.services[_service.get_name()] = _service
+        self.services[service.name] = service

-    def get_services(self):
-        """
-        Gets the dictionary of services on this node.
-
-        Returns:
-             Dictionary of services on this node
-        """
-        return self.services
-
-    def has_service(self, _protocol):
+    def has_service(self, protocol_name: str) -> bool:
        """
        Indicates whether a service is on a node.

-        Returns:
-             True if service (protocol) is on the node
+        :param protocol_name: The service (protocol)e.
+        :return: True if service (protocol) is on the node, otherwise False.
        """
        for service_key, service_value in self.services.items():
-            if service_key == _protocol:
+            if service_key == protocol_name:
                return True
-            else:
-                pass
        return False

-    def service_running(self, _protocol):
+    def service_running(self, protocol_name: str) -> bool:
        """
        Indicates whether a service is in a running state on the node.

-        Returns:
-             True if service (protocol) is in a running state on the node
+        :param protocol_name: The service (protocol)
+        :return: True if service (protocol) is in a running state on the node, otherwise False.
        """
        for service_key, service_value in self.services.items():
-            if service_key == _protocol:
-                if service_value.get_state() != SOFTWARE_STATE.PATCHING:
+            if service_key == protocol_name:
+                if service_value.software_state != SoftwareState.PATCHING:
                    return True
                else:
                    return False
-            else:
-                pass
        return False

-    def service_is_overwhelmed(self, _protocol):
+    def service_is_overwhelmed(self, protocol_name: str) -> bool:
        """
        Indicates whether a service is in an overwhelmed state on the node.

-        Returns:
-             True if service (protocol) is in an overwhelmed state on the node
+        :param protocol_name: The service (protocol)
+        :return: True if service (protocol) is in an overwhelmed state on the node, otherwise False.
        """
        for service_key, service_value in self.services.items():
-            if service_key == _protocol:
-                if service_value.get_state() == SOFTWARE_STATE.OVERWHELMED:
+            if service_key == protocol_name:
+                if service_value.software_state == SoftwareState.OVERWHELMED:
                    return True
                else:
                    return False
-            else:
-                pass
        return False

-    def set_service_state(self, _protocol, _state):
+    def set_service_state(self, protocol_name: str, software_state: SoftwareState) -> None:
        """
-        Sets the state of a service (protocol) on the node.
+        Sets the software_state of a service (protocol) on the node.

-        Args:
-            _protocol: The service (protocol)
-            _state: The state value
+        :param protocol_name: The service (protocol).
+        :param software_state: The software_state.
        """
-        for service_key, service_value in self.services.items():
-            if service_key == _protocol:
+        if self.hardware_state != HardwareState.OFF:
+            service_key = protocol_name
+            service_value = self.services.get(service_key)
+            if service_value:
                # Can't set to compromised if you're in a patching state
                if (
-                    _state == SOFTWARE_STATE.COMPROMISED
-                    and service_value.get_state() != SOFTWARE_STATE.PATCHING
-                ) or _state != SOFTWARE_STATE.COMPROMISED:
-                    service_value.set_state(_state)
-                else:
-                    # Do nothing
-                    pass
-                if _state == SOFTWARE_STATE.PATCHING:
-                    service_value.patching_count = (
-                        self.config_values.service_patching_duration
-                    )
-                else:
-                    # Do nothing
-                    pass
+                    software_state == SoftwareState.COMPROMISED
+                    and service_value.software_state != SoftwareState.PATCHING
+                ) or software_state != SoftwareState.COMPROMISED:
+                    service_value.software_state = software_state
+                if software_state == SoftwareState.PATCHING:
+                    service_value.patching_count = self.config_values.service_patching_duration
+        else:
+            _LOGGER.info(
+                f"The Nodes hardware state is OFF so the state of a service "
+                f"cannot be changed. "
+                f"Node.node_id:{self.node_id}, "
+                f"Node.hardware_state:{self.hardware_state}, "
+                f"Node.services[<key>]:{protocol_name}, "
+                f"Node.services[<key>].software_state:{software_state}"
+            )

-    def set_service_state_if_not_compromised(self, _protocol, _state):
+    def set_service_state_if_not_compromised(self, protocol_name: str, software_state: SoftwareState) -> None:
        """
-        Sets the state of a service (protocol) on the node if the operating state is not "compromised".
+        Sets the software_state of a service (protocol) on the node.

-        Args:
-            _protocol: The service (protocol)
-            _state: The state value
+        Done if the software_state is not "compromised".
+
+        :param protocol_name: The service (protocol).
+        :param software_state: The software_state.
        """
-        for service_key, service_value in self.services.items():
-            if service_key == _protocol:
-                if service_value.get_state() != SOFTWARE_STATE.COMPROMISED:
-                    service_value.set_state(_state)
-                    if _state == SOFTWARE_STATE.PATCHING:
-                        service_value.patching_count = (
-                            self.config_values.service_patching_duration
-                        )
+        if self.hardware_state != HardwareState.OFF:
+            service_key = protocol_name
+            service_value = self.services.get(service_key)
+            if service_value:
+                if service_value.software_state != SoftwareState.COMPROMISED:
+                    service_value.software_state = software_state
+                    if software_state == SoftwareState.PATCHING:
+                        service_value.patching_count = self.config_values.service_patching_duration
+        else:
+            _LOGGER.info(
+                f"The Nodes hardware state is OFF so the state of a service "
+                f"cannot be changed. "
+                f"Node.node_id:{self.node_id}, "
+                f"Node.hardware_state:{self.hardware_state}, "
+                f"Node.services[<key>]:{protocol_name}, "
+                f"Node.services[<key>].software_state:{software_state}"
+            )

-    def get_service_state(self, _protocol):
+    def get_service_state(self, protocol_name: str) -> SoftwareState:
        """
        Gets the state of a service.

-        Returns:
-             The state of the service
+        :return: The software_state of the service.
        """
-        for service_key, service_value in self.services.items():
-            if service_key == _protocol:
-                return service_value.get_state()
+        service_key = protocol_name
+        service_value = self.services.get(service_key)
+        if service_value:
+            return service_value.software_state

-    def update_services_patching_status(self):
+    def update_services_patching_status(self) -> None:
        """Updates the patching counter for any service that are patching."""
        for service_key, service_value in self.services.items():
-            if service_value.get_state() == SOFTWARE_STATE.PATCHING:
+            if service_value.software_state == SoftwareState.PATCHING:
                service_value.reduce_patching_count()
+
+    def update_resetting_status(self) -> None:
+        """Update resetting counter and set software state if it reached 0."""
+        super().update_resetting_status()
+        if self.resetting_count <= 0:
+            for service in self.services.values():
+                service.software_state = SoftwareState.GOOD
+
+    def update_booting_status(self) -> None:
+        """Update booting counter and set software to good if it reached 0."""
+        super().update_booting_status()
+        if self.booting_count <= 0:
+            for service in self.services.values():
+                service.software_state = SoftwareState.GOOD
--- a/src/primaite/notebooks/init.py
+++ b/src/primaite/notebooks/init.py
@@ -0,0 +1,34 @@
+# © Crown-owned copyright 2023, Defence Science and Technology Laboratory UK
+"""Contains default jupyter notebooks which demonstrate PrimAITE functionality."""
+
+import importlib.util
+import os
+import subprocess
+import sys
+from logging import Logger
+
+from primaite import getLogger, PRIMAITE_PATHS
+
+_LOGGER: Logger = getLogger(__name__)
+
+
+def start_jupyter_session() -> None:
+    """
+    Starts a new Jupyter notebook session in the app notebooks directory.
+
+    Currently only works on Windows OS.
+
+    .. todo:: Figure out how to get this working for Linux and MacOS too.
+    """
+    if importlib.util.find_spec("jupyter") is not None:
+        jupyter_cmd = "python3 -m jupyter lab"
+        if sys.platform == "win32":
+            jupyter_cmd = "jupyter lab"
+
+        working_dir = os.getcwd()
+        os.chdir(PRIMAITE_PATHS.user_notebooks_path)
+        subprocess.Popen(jupyter_cmd)
+        os.chdir(working_dir)
+    else:
+        # Jupyter is not installed
+        _LOGGER.error("Cannot start jupyter lab as it is not installed")
--- a/src/primaite/pol/init.py
+++ b/src/primaite/pol/init.py
@@ -1 +1,2 @@
-# Crown Copyright (C) Dstl 2022. DEFCON 703. Shared in confidence.
+# © Crown-owned copyright 2023, Defence Science and Technology Laboratory UK
+"""Pattern of Life- Represents the actions of users on the network."""
--- a/src/primaite/pol/green_pol.py
+++ b/src/primaite/pol/green_pol.py
@@ -1,16 +1,29 @@
-# Crown Copyright (C) Dstl 2022. DEFCON 703. Shared in confidence.
+# © Crown-owned copyright 2023, Defence Science and Technology Laboratory UK
 """Implements Pattern of Life on the network (nodes and links)."""
+from typing import Dict

-from networkx import shortest_path
+from networkx import MultiGraph, shortest_path

-from primaite.common.enums import HARDWARE_STATE, NODE_POL_TYPE, SOFTWARE_STATE, TYPE
+from primaite.acl.access_control_list import AccessControlList
+from primaite.common.custom_typing import NodeUnion
+from primaite.common.enums import HardwareState, NodePOLType, NodeType, SoftwareState
+from primaite.links.link import Link
 from primaite.nodes.active_node import ActiveNode
+from primaite.nodes.node_state_instruction_green import NodeStateInstructionGreen
 from primaite.nodes.service_node import ServiceNode
+from primaite.pol.ier import IER

-_VERBOSE = False
+_VERBOSE: bool = False


-def apply_iers(network, nodes, links, iers, acl, step):
+def apply_iers(
+    network: MultiGraph,
+    nodes: Dict[str, NodeUnion],
+    links: Dict[str, Link],
+    iers: Dict[str, IER],
+    acl: AccessControlList,
+    step: int,
+) -> None:
    """
    Applies IERs to the links (link pattern of life).

@@ -51,30 +64,30 @@ def apply_iers(network, nodes, links, iers, acl, step):
            dest_node = nodes[dest_node_id]

            # 1. Check the source node situation
-            if source_node.get_type() == TYPE.SWITCH:
+            # TODO: should be using isinstance rather than checking node type attribute. IE. just because it's a switch
+            # doesn't mean it has a software state? It could be a PassiveNode or ActiveNode
+            if source_node.node_type == NodeType.SWITCH:
                # It's a switch
                if (
-                    source_node.get_state() == HARDWARE_STATE.ON
-                    and source_node.get_os_state() != SOFTWARE_STATE.PATCHING
+                    source_node.hardware_state == HardwareState.ON
+                    and source_node.software_state != SoftwareState.PATCHING
                ):
                    source_valid = True
                else:
                    # IER no longer valid
                    source_valid = False
-            elif source_node.get_type() == TYPE.ACTUATOR:
+            elif source_node.node_type == NodeType.ACTUATOR:
                # It's an actuator
                # TO DO
                pass
            else:
                # It's not a switch or an actuator (so active node)
                if (
-                    source_node.get_state() == HARDWARE_STATE.ON
-                    and source_node.get_os_state() != SOFTWARE_STATE.PATCHING
+                    source_node.hardware_state == HardwareState.ON
+                    and source_node.software_state != SoftwareState.PATCHING
                ):
                    if source_node.has_service(protocol):
-                        if source_node.service_running(
-                            protocol
-                        ) and not source_node.service_is_overwhelmed(protocol):
+                        if source_node.service_running(protocol) and not source_node.service_is_overwhelmed(protocol):
                            source_valid = True
                        else:
                            source_valid = False
@@ -87,29 +100,21 @@ def apply_iers(network, nodes, links, iers, acl, step):
                    source_valid = False

            # 2. Check the dest node situation
-            if dest_node.get_type() == TYPE.SWITCH:
+            if dest_node.node_type == NodeType.SWITCH:
                # It's a switch
-                if (
-                    dest_node.get_state() == HARDWARE_STATE.ON
-                    and dest_node.get_os_state() != SOFTWARE_STATE.PATCHING
-                ):
+                if dest_node.hardware_state == HardwareState.ON and dest_node.software_state != SoftwareState.PATCHING:
                    dest_valid = True
                else:
                    # IER no longer valid
                    dest_valid = False
-            elif dest_node.get_type() == TYPE.ACTUATOR:
+            elif dest_node.node_type == NodeType.ACTUATOR:
                # It's an actuator
                pass
            else:
                # It's not a switch or an actuator (so active node)
-                if (
-                    dest_node.get_state() == HARDWARE_STATE.ON
-                    and dest_node.get_os_state() != SOFTWARE_STATE.PATCHING
-                ):
+                if dest_node.hardware_state == HardwareState.ON and dest_node.software_state != SoftwareState.PATCHING:
                    if dest_node.has_service(protocol):
-                        if dest_node.service_running(
-                            protocol
-                        ) and not dest_node.service_is_overwhelmed(protocol):
+                        if dest_node.service_running(protocol) and not dest_node.service_is_overwhelmed(protocol):
                            dest_valid = True
                        else:
                            dest_valid = False
@@ -122,16 +127,14 @@ def apply_iers(network, nodes, links, iers, acl, step):
                    dest_valid = False

            # 3. Check that the ACL doesn't block it
-            acl_block = acl.is_blocked(
-                source_node.get_ip_address(), dest_node.get_ip_address(), protocol, port
-            )
+            acl_block = acl.is_blocked(source_node.ip_address, dest_node.ip_address, protocol, port)
            if acl_block:
                if _VERBOSE:
                    print(
                        "ACL block on source: "
-                        + source_node.get_ip_address()
+                        + source_node.ip_address
                        + ", dest: "
-                        + dest_node.get_ip_address()
+                        + dest_node.ip_address
                        + ", protocol: "
                        + protocol
                        + ", port: "
@@ -155,10 +158,7 @@ def apply_iers(network, nodes, links, iers, acl, step):

                # We might have a switch in the path, so check all nodes are operational
                for node in path_node_list:
-                    if (
-                        node.get_state() != HARDWARE_STATE.ON
-                        or node.get_os_state() == SOFTWARE_STATE.PATCHING
-                    ):
+                    if node.hardware_state != HardwareState.ON or node.software_state == SoftwareState.PATCHING:
                        path_valid = False

                if path_valid:
@@ -170,9 +170,7 @@ def apply_iers(network, nodes, links, iers, acl, step):
                    # Check that the link capacity is not exceeded by the new load
                    while count < path_node_list_length - 1:
                        # Get the link between the next two nodes
-                        edge_dict = network.get_edge_data(
-                            path_node_list[count], path_node_list[count + 1]
-                        )
+                        edge_dict = network.get_edge_data(path_node_list[count], path_node_list[count + 1])
                        link_id = edge_dict[0].get("id")
                        link = links[link_id]
                        # Check whether the new load exceeds the bandwidth
@@ -190,7 +188,8 @@ def apply_iers(network, nodes, links, iers, acl, step):
                        while count < path_node_list_length - 1:
                            # Get the link between the next two nodes
                            edge_dict = network.get_edge_data(
-                                path_node_list[count], path_node_list[count + 1]
+                                path_node_list[count],
+                                path_node_list[count + 1],
                            )
                            link_id = edge_dict[0].get("id")
                            link = links[link_id]
@@ -215,7 +214,11 @@ def apply_iers(network, nodes, links, iers, acl, step):
            pass


-def apply_node_pol(nodes, node_pol, step):
+def apply_node_pol(
+    nodes: Dict[str, NodeUnion],
+    node_pol: Dict[str, NodeStateInstructionGreen],
+    step: int,
+) -> None:
    """
    Applies node pattern of life.

@@ -239,15 +242,15 @@ def apply_node_pol(nodes, node_pol, step):
            # continue --------------------------
            node = nodes[node_id]

-            if node_pol_type == NODE_POL_TYPE.OPERATING:
-                # Change operating state
-                node.set_state(state)
-            elif node_pol_type == NODE_POL_TYPE.OS:
+            if node_pol_type == NodePOLType.OPERATING:
+                # Change hardware state
+                node.hardware_state = state
+            elif node_pol_type == NodePOLType.OS:
                # Change OS state
                # Don't allow PoL to fix something that is compromised. Only the Blue agent can do this
                if isinstance(node, ActiveNode) or isinstance(node, ServiceNode):
-                    node.set_os_state_if_not_compromised(state)
-            elif node_pol_type == NODE_POL_TYPE.SERVICE:
+                    node.set_software_state_if_not_compromised(state)
+            elif node_pol_type == NodePOLType.SERVICE:
                # Change a service state
                # Don't allow PoL to fix something that is compromised. Only the Blue agent can do this
                if isinstance(node, ServiceNode):
--- a/src/primaite/pol/ier.py
+++ b/src/primaite/pol/ier.py
@@ -1,4 +1,4 @@
-# Crown Copyright (C) Dstl 2022. DEFCON 703. Shared in confidence.
+# © Crown-owned copyright 2023, Defence Science and Technology Laboratory UK
 """
 Information Exchange Requirements for APE.

@@ -11,44 +11,43 @@ class IER(object):

    def __init__(
        self,
-        _id,
-        _start_step,
-        _end_step,
-        _load,
-        _protocol,
-        _port,
-        _source_node_id,
-        _dest_node_id,
-        _mission_criticality,
-        _running=False,
-    ):
+        _id: str,
+        _start_step: int,
+        _end_step: int,
+        _load: int,
+        _protocol: str,
+        _port: str,
+        _source_node_id: str,
+        _dest_node_id: str,
+        _mission_criticality: int,
+        _running: bool = False,
+    ) -> None:
        """
-        Init.
+        Initialise an Information Exchange Request.

-        Args:
-            _id: The IER id
-            _start_step: The step when this IER should start
-            _end_step: The step when this IER should end
-            _load: The load this IER should put on a link (bps)
-            _protocol: The protocol of this IER
-            _port: The port this IER runs on
-            _source_node_id: The source node ID
-            _dest_node_id: The destination node ID
-            _mission_criticality: Criticality of this IER to the mission (0 none, 5 mission critical)
-            _running: Indicates whether the IER is currently running
+        :param _id: The IER id
+        :param _start_step: The step when this IER should start
+        :param _end_step: The step when this IER should end
+        :param _load: The load this IER should put on a link (bps)
+        :param _protocol: The protocol of this IER
+        :param _port: The port this IER runs on
+        :param _source_node_id: The source node ID
+        :param _dest_node_id: The destination node ID
+        :param _mission_criticality: Criticality of this IER to the mission (0 none, 5 mission critical)
+        :param _running: Indicates whether the IER is currently running
        """
-        self.id = _id
-        self.start_step = _start_step
-        self.end_step = _end_step
-        self.source_node_id = _source_node_id
-        self.dest_node_id = _dest_node_id
-        self.load = _load
-        self.protocol = _protocol
-        self.port = _port
-        self.mission_criticality = _mission_criticality
-        self.running = _running
+        self.id: str = _id
+        self.start_step: int = _start_step
+        self.end_step: int = _end_step
+        self.source_node_id: str = _source_node_id
+        self.dest_node_id: str = _dest_node_id
+        self.load: int = _load
+        self.protocol: str = _protocol
+        self.port: str = _port
+        self.mission_criticality: int = _mission_criticality
+        self.running: bool = _running

-    def get_id(self):
+    def get_id(self) -> str:
        """
        Gets IER ID.

@@ -57,7 +56,7 @@ class IER(object):
        """
        return self.id

-    def get_start_step(self):
+    def get_start_step(self) -> int:
        """
        Gets IER start step.

@@ -66,7 +65,7 @@ class IER(object):
        """
        return self.start_step

-    def get_end_step(self):
+    def get_end_step(self) -> int:
        """
        Gets IER end step.

@@ -75,7 +74,7 @@ class IER(object):
        """
        return self.end_step

-    def get_load(self):
+    def get_load(self) -> int:
        """
        Gets IER load.

@@ -84,7 +83,7 @@ class IER(object):
        """
        return self.load

-    def get_protocol(self):
+    def get_protocol(self) -> str:
        """
        Gets IER protocol.

@@ -93,7 +92,7 @@ class IER(object):
        """
        return self.protocol

-    def get_port(self):
+    def get_port(self) -> str:
        """
        Gets IER port.

@@ -102,7 +101,7 @@ class IER(object):
        """
        return self.port

-    def get_source_node_id(self):
+    def get_source_node_id(self) -> str:
        """
        Gets IER source node ID.

@@ -111,7 +110,7 @@ class IER(object):
        """
        return self.source_node_id

-    def get_dest_node_id(self):
+    def get_dest_node_id(self) -> str:
        """
        Gets IER destination node ID.

@@ -120,7 +119,7 @@ class IER(object):
        """
        return self.dest_node_id

-    def get_is_running(self):
+    def get_is_running(self) -> bool:
        """
        Informs whether the IER is currently running.

@@ -129,7 +128,7 @@ class IER(object):
        """
        return self.running

-    def set_is_running(self, _value):
+    def set_is_running(self, _value: bool) -> None:
        """
        Sets the running state of the IER.

@@ -138,7 +137,7 @@ class IER(object):
        """
        self.running = _value

-    def get_mission_criticality(self):
+    def get_mission_criticality(self) -> int:
        """
        Gets the IER mission criticality (used in the reward function).

--- a/src/primaite/pol/red_agent_pol.py
+++ b/src/primaite/pol/red_agent_pol.py
@@ -1,22 +1,32 @@
-# Crown Copyright (C) Dstl 2022. DEFCON 703. Shared in confidence.
+# © Crown-owned copyright 2023, Defence Science and Technology Laboratory UK
 """Implements POL on the network (nodes and links) resulting from the red agent attack."""
+from typing import Dict

-from networkx import shortest_path
+from networkx import MultiGraph, shortest_path

-from primaite.common.enums import (
-    HARDWARE_STATE,
-    NODE_POL_INITIATOR,
-    NODE_POL_TYPE,
-    SOFTWARE_STATE,
-    TYPE,
-)
+from primaite import getLogger
+from primaite.acl.access_control_list import AccessControlList
+from primaite.common.custom_typing import NodeUnion
+from primaite.common.enums import HardwareState, NodePOLInitiator, NodePOLType, NodeType, SoftwareState
+from primaite.links.link import Link
 from primaite.nodes.active_node import ActiveNode
+from primaite.nodes.node_state_instruction_red import NodeStateInstructionRed
 from primaite.nodes.service_node import ServiceNode
+from primaite.pol.ier import IER

-_VERBOSE = False
+_LOGGER = getLogger(__name__)
+
+_VERBOSE: bool = False


-def apply_red_agent_iers(network, nodes, links, iers, acl, step):
+def apply_red_agent_iers(
+    network: MultiGraph,
+    nodes: Dict[str, NodeUnion],
+    links: Dict[str, Link],
+    iers: Dict[str, IER],
+    acl: AccessControlList,
+    step: int,
+) -> None:
    """
    Applies IERs to the links (link POL) resulting from red agent attack.

@@ -54,26 +64,26 @@ def apply_red_agent_iers(network, nodes, links, iers, acl, step):
            dest_node = nodes[dest_node_id]

            # 1. Check the source node situation
-            if source_node.get_type() == TYPE.SWITCH:
+            if source_node.node_type == NodeType.SWITCH:
                # It's a switch
-                if source_node.get_state() == HARDWARE_STATE.ON:
+                if source_node.hardware_state == HardwareState.ON:
                    source_valid = True
                else:
                    # IER no longer valid
                    source_valid = False
-            elif source_node.get_type() == TYPE.ACTUATOR:
+            elif source_node.node_type == NodeType.ACTUATOR:
                # It's an actuator
                # TO DO
                pass
            else:
                # It's not a switch or an actuator (so active node)
-                if source_node.get_state() == HARDWARE_STATE.ON:
+                # TODO: this occurs after ruling out the possibility that the node is a switch or an actuator, but it
+                # could still be a passive/active node, therefore it won't have a hardware_state. The logic here needs
+                # to change according to duck typing.
+                if source_node.hardware_state == HardwareState.ON:
                    if source_node.has_service(protocol):
                        # Red agents IERs can only be valid if the source service is in a compromised state
-                        if (
-                            source_node.get_service_state(protocol)
-                            == SOFTWARE_STATE.COMPROMISED
-                        ):
+                        if source_node.get_service_state(protocol) == SoftwareState.COMPROMISED:
                            source_valid = True
                        else:
                            source_valid = False
@@ -86,19 +96,19 @@ def apply_red_agent_iers(network, nodes, links, iers, acl, step):
                    source_valid = False

            # 2. Check the dest node situation
-            if dest_node.get_type() == TYPE.SWITCH:
+            if dest_node.node_type == NodeType.SWITCH:
                # It's a switch
-                if dest_node.get_state() == HARDWARE_STATE.ON:
+                if dest_node.hardware_state == HardwareState.ON:
                    dest_valid = True
                else:
                    # IER no longer valid
                    dest_valid = False
-            elif dest_node.get_type() == TYPE.ACTUATOR:
+            elif dest_node.node_type == NodeType.ACTUATOR:
                # It's an actuator
                pass
            else:
                # It's not a switch or an actuator (so active node)
-                if dest_node.get_state() == HARDWARE_STATE.ON:
+                if dest_node.hardware_state == HardwareState.ON:
                    if dest_node.has_service(protocol):
                        # We don't care what state the destination service is in for an IER
                        dest_valid = True
@@ -111,16 +121,14 @@ def apply_red_agent_iers(network, nodes, links, iers, acl, step):
                    dest_valid = False

            # 3. Check that the ACL doesn't block it
-            acl_block = acl.is_blocked(
-                source_node.get_ip_address(), dest_node.get_ip_address(), protocol, port
-            )
+            acl_block = acl.is_blocked(source_node.ip_address, dest_node.ip_address, protocol, port)
            if acl_block:
                if _VERBOSE:
                    print(
                        "ACL block on source: "
-                        + source_node.get_ip_address()
+                        + source_node.ip_address
                        + ", dest: "
-                        + dest_node.get_ip_address()
+                        + dest_node.ip_address
                        + ", protocol: "
                        + protocol
                        + ", port: "
@@ -145,7 +153,7 @@ def apply_red_agent_iers(network, nodes, links, iers, acl, step):
                # We might have a switch in the path, so check all nodes are operational
                # We're assuming here that red agents can get past switches that are patching
                for node in path_node_list:
-                    if node.get_state() != HARDWARE_STATE.ON:
+                    if node.hardware_state != HardwareState.ON:
                        path_valid = False

                if path_valid:
@@ -157,9 +165,7 @@ def apply_red_agent_iers(network, nodes, links, iers, acl, step):
                    # Check that the link capacity is not exceeded by the new load
                    while count < path_node_list_length - 1:
                        # Get the link between the next two nodes
-                        edge_dict = network.get_edge_data(
-                            path_node_list[count], path_node_list[count + 1]
-                        )
+                        edge_dict = network.get_edge_data(path_node_list[count], path_node_list[count + 1])
                        link_id = edge_dict[0].get("id")
                        link = links[link_id]
                        # Check whether the new load exceeds the bandwidth
@@ -177,7 +183,8 @@ def apply_red_agent_iers(network, nodes, links, iers, acl, step):
                        while count < path_node_list_length - 1:
                            # Get the link between the next two nodes
                            edge_dict = network.get_edge_data(
-                                path_node_list[count], path_node_list[count + 1]
+                                path_node_list[count],
+                                path_node_list[count + 1],
                            )
                            link_id = edge_dict[0].get("id")
                            link = links[link_id]
@@ -207,7 +214,12 @@ def apply_red_agent_iers(network, nodes, links, iers, acl, step):
    pass


-def apply_red_agent_node_pol(nodes, iers, node_pol, step):
+def apply_red_agent_node_pol(
+    nodes: Dict[str, NodeUnion],
+    iers: Dict[str, IER],
+    node_pol: Dict[str, NodeStateInstructionRed],
+    step: int,
+) -> None:
    """
    Applies node pattern of life.

@@ -230,30 +242,33 @@ def apply_red_agent_node_pol(nodes, iers, node_pol, step):
        state = node_instruction.get_state()
        source_node_id = node_instruction.get_source_node_id()
        source_node_service_name = node_instruction.get_source_node_service()
-        source_node_service_state_value = (
-            node_instruction.get_source_node_service_state()
-        )
+        source_node_service_state_value = node_instruction.get_source_node_service_state()

        passed_checks = False

        if step >= start_step and step <= stop_step:
            # continue --------------------------
-            target_node = nodes[target_node_id]
+            target_node: NodeUnion = nodes[target_node_id]
+
+            # check if the initiator type is a str, and if so, cast it as
+            # NodePOLInitiator
+            if isinstance(initiator, str):
+                initiator = NodePOLInitiator[initiator]

            # Based the action taken on the initiator type
-            if initiator == NODE_POL_INITIATOR.DIRECT:
+            if initiator == NodePOLInitiator.DIRECT:
                # No conditions required, just apply the change
                passed_checks = True
-            elif initiator == NODE_POL_INITIATOR.IER:
+            elif initiator == NodePOLInitiator.IER:
                # Need to check there is a red IER incoming
                passed_checks = is_red_ier_incoming(target_node, iers, pol_type)
-            elif initiator == NODE_POL_INITIATOR.SERVICE:
+            elif initiator == NodePOLInitiator.SERVICE:
                # Need to check the condition of a service on another node
                source_node = nodes[source_node_id]
                if source_node.has_service(source_node_service_name):
                    if (
                        source_node.get_service_state(source_node_service_name)
-                        == SOFTWARE_STATE[source_node_service_state_value]
+                        == SoftwareState[source_node_service_state_value]
                    ):
                        passed_checks = True
                    else:
@@ -263,57 +278,57 @@ def apply_red_agent_node_pol(nodes, iers, node_pol, step):
                    # Do nothing, service not on this node
                    pass
            else:
-                if _VERBOSE:
-                    print("Node Red Agent PoL not allowed - misconfiguration")
+                _LOGGER.warning("Node Red Agent PoL not allowed - misconfiguration")

            # Only apply the PoL if the checks have passed (based on the initiator type)
            if passed_checks:
                # Apply the change
-                if pol_type == NODE_POL_TYPE.OPERATING:
-                    # Change operating state
-                    target_node.set_state(state)
-                elif pol_type == NODE_POL_TYPE.OS:
+                if pol_type == NodePOLType.OPERATING:
+                    # Change hardware state
+                    target_node.hardware_state = state
+                elif pol_type == NodePOLType.OS:
                    # Change OS state
-                    if isinstance(target_node, ActiveNode) or isinstance(
-                        target_node, ServiceNode
-                    ):
-                        target_node.set_os_state(state)
-                elif pol_type == NODE_POL_TYPE.SERVICE:
+                    if isinstance(target_node, ActiveNode) or isinstance(target_node, ServiceNode):
+                        target_node.software_state = state
+                elif pol_type == NodePOLType.SERVICE:
                    # Change a service state
                    if isinstance(target_node, ServiceNode):
                        target_node.set_service_state(service_name, state)
                else:
                    # Change the file system status
-                    if isinstance(target_node, ActiveNode) or isinstance(
-                        target_node, ServiceNode
-                    ):
+                    if isinstance(target_node, ActiveNode) or isinstance(target_node, ServiceNode):
                        target_node.set_file_system_state(state)
            else:
-                if _VERBOSE:
-                    print("Node Red Agent PoL not allowed - did not pass checks")
+                _LOGGER.debug("Node Red Agent PoL not allowed - did not pass checks")
        else:
            # PoL is not valid in this time step
            pass


-def is_red_ier_incoming(node, iers, node_pol_type):
-    """
-    Checks if the RED IER is incoming.
+def is_red_ier_incoming(node: NodeUnion, iers: Dict[str, IER], node_pol_type: NodePOLType) -> bool:
+    """Checks if the RED IER is incoming.

-    TODO: Write more descriptive docstring with params and returns.
+    :param node: Destination node of the IER
+    :type node: NodeUnion
+    :param iers: Directory of IERs
+    :type iers: Dict[str,IER]
+    :param node_pol_type: Type of Pattern-Of-Life
+    :type node_pol_type: NodePOLType
+    :return: Whether the RED IER is incoming.
+    :rtype: bool
    """
-    node_id = node.get_id()
+    node_id = node.node_id

    for ier_key, ier_value in iers.items():
        if ier_value.get_is_running() and ier_value.get_dest_node_id() == node_id:
            if (
-                node_pol_type == NODE_POL_TYPE.OPERATING
-                or node_pol_type == NODE_POL_TYPE.OS
-                or node_pol_type == NODE_POL_TYPE.FILE
+                node_pol_type == NodePOLType.OPERATING
+                or node_pol_type == NodePOLType.OS
+                or node_pol_type == NodePOLType.FILE
            ):
-                # It's looking to change operating state, file system or O/S state, so valid
+                # It's looking to change hardware state, file system or SoftwareState, so valid
                return True
-            elif node_pol_type == NODE_POL_TYPE.SERVICE:
+            elif node_pol_type == NodePOLType.SERVICE:
                # Check if the service is present on the node and running
                ier_protocol = ier_value.get_protocol()
                if isinstance(node, ServiceNode):
--- a/src/primaite/primaite_session.py
+++ b/src/primaite/primaite_session.py
@@ -0,0 +1,209 @@
+# © Crown-owned copyright 2023, Defence Science and Technology Laboratory UK
+"""Main entry point to PrimAITE. Configure training/evaluation experiments and input/output."""
+from __future__ import annotations
+
+import json
+from pathlib import Path
+from typing import Any, Dict, Final, Optional, Tuple, Union
+
+from primaite import getLogger
+from primaite.agents.agent_abc import AgentSessionABC
+from primaite.agents.hardcoded_acl import HardCodedACLAgent
+from primaite.agents.hardcoded_node import HardCodedNodeAgent
+from primaite.agents.rllib import RLlibAgent
+from primaite.agents.sb3 import SB3Agent
+from primaite.agents.simple import DoNothingACLAgent, DoNothingNodeAgent, DummyAgent, RandomAgent
+from primaite.common.enums import ActionType, AgentFramework, AgentIdentifier, SessionType
+from primaite.config import lay_down_config, training_config
+from primaite.config.training_config import TrainingConfig
+from primaite.utils.session_metadata_parser import parse_session_metadata
+from primaite.utils.session_output_reader import all_transactions_dict, av_rewards_dict
+
+_LOGGER = getLogger(__name__)
+
+
+class PrimaiteSession:
+    """
+    The PrimaiteSession class.
+
+    Provides a single learning and evaluation entry point for all training and lay down configurations.
+    """
+
+    def __init__(
+        self,
+        training_config_path: Optional[Union[str, Path]] = "",
+        lay_down_config_path: Optional[Union[str, Path]] = "",
+        session_path: Optional[Union[str, Path]] = None,
+    ) -> None:
+        """
+        The PrimaiteSession constructor.
+
+        :param training_config_path: YAML file containing configurable items defined in
+            `primaite.config.training_config.TrainingConfig`
+        :type training_config_path: Union[path, str]
+        :param lay_down_config_path: YAML file containing configurable items for generating network laydown.
+        :type lay_down_config_path: Union[path, str]
+        :param session_path: directory path of the session to load
+        """
+        self._agent_session: AgentSessionABC = None  # noqa
+        self.session_path: Path = session_path  # noqa
+        self.timestamp_str: str = None  # noqa
+        self.learning_path: Path = None  # noqa
+        self.evaluation_path: Path = None  # noqa
+
+        # check if session path is provided
+        if session_path is not None:
+            # set load_session to true
+            self.is_load_session = True
+            if not isinstance(session_path, Path):
+                session_path = Path(session_path)
+
+            # if a session path is provided, load it
+            if not session_path.exists():
+                raise Exception(f"Session could not be loaded. Path does not exist: {session_path}")
+
+            md_dict, training_config_path, lay_down_config_path = parse_session_metadata(session_path)
+
+        if not isinstance(training_config_path, Path):
+            training_config_path = Path(training_config_path)
+        self._training_config_path: Final[Union[Path, str]] = training_config_path
+        self._training_config: Final[TrainingConfig] = training_config.load(self._training_config_path)
+
+        if not isinstance(lay_down_config_path, Path):
+            lay_down_config_path = Path(lay_down_config_path)
+        self._lay_down_config_path: Final[Union[Path, str]] = lay_down_config_path
+        self._lay_down_config: Dict = lay_down_config.load(self._lay_down_config_path)  # noqa
+
+    def setup(self) -> None:
+        """Performs the session setup."""
+        if self._training_config.agent_framework == AgentFramework.CUSTOM:
+            _LOGGER.debug(f"PrimaiteSession Setup: Agent Framework = {AgentFramework.CUSTOM}")
+            if self._training_config.agent_identifier == AgentIdentifier.HARDCODED:
+                _LOGGER.debug(f"PrimaiteSession Setup: Agent Identifier =" f" {AgentIdentifier.HARDCODED}")
+                if self._training_config.action_type == ActionType.NODE:
+                    # Deterministic Hardcoded Agent with Node Action Space
+                    self._agent_session = HardCodedNodeAgent(
+                        self._training_config_path, self._lay_down_config_path, self.session_path
+                    )
+
+                elif self._training_config.action_type == ActionType.ACL:
+                    # Deterministic Hardcoded Agent with ACL Action Space
+                    self._agent_session = HardCodedACLAgent(
+                        self._training_config_path, self._lay_down_config_path, self.session_path
+                    )
+
+                elif self._training_config.action_type == ActionType.ANY:
+                    # Deterministic Hardcoded Agent with ANY Action Space
+                    raise NotImplementedError
+
+                else:
+                    # Invalid AgentIdentifier ActionType combo
+                    raise ValueError
+
+            elif self._training_config.agent_identifier == AgentIdentifier.DO_NOTHING:
+                _LOGGER.debug(f"PrimaiteSession Setup: Agent Identifier =" f" {AgentIdentifier.DO_NOTHING}")
+                if self._training_config.action_type == ActionType.NODE:
+                    self._agent_session = DoNothingNodeAgent(
+                        self._training_config_path, self._lay_down_config_path, self.session_path
+                    )
+
+                elif self._training_config.action_type == ActionType.ACL:
+                    # Deterministic Hardcoded Agent with ACL Action Space
+                    self._agent_session = DoNothingACLAgent(
+                        self._training_config_path, self._lay_down_config_path, self.session_path
+                    )
+
+                elif self._training_config.action_type == ActionType.ANY:
+                    # Deterministic Hardcoded Agent with ANY Action Space
+                    raise NotImplementedError
+
+                else:
+                    # Invalid AgentIdentifier ActionType combo
+                    raise ValueError
+
+            elif self._training_config.agent_identifier == AgentIdentifier.RANDOM:
+                _LOGGER.debug(f"PrimaiteSession Setup: Agent Identifier =" f" {AgentIdentifier.RANDOM}")
+                self._agent_session = RandomAgent(
+                    self._training_config_path, self._lay_down_config_path, self.session_path
+                )
+            elif self._training_config.agent_identifier == AgentIdentifier.DUMMY:
+                _LOGGER.debug(f"PrimaiteSession Setup: Agent Identifier =" f" {AgentIdentifier.DUMMY}")
+                self._agent_session = DummyAgent(
+                    self._training_config_path, self._lay_down_config_path, self.session_path
+                )
+
+            else:
+                # Invalid AgentFramework AgentIdentifier combo
+                raise ValueError
+
+        elif self._training_config.agent_framework == AgentFramework.SB3:
+            _LOGGER.debug(f"PrimaiteSession Setup: Agent Framework = {AgentFramework.SB3}")
+            # Stable Baselines3 Agent
+            self._agent_session = SB3Agent(self._training_config_path, self._lay_down_config_path, self.session_path)
+
+        elif self._training_config.agent_framework == AgentFramework.RLLIB:
+            _LOGGER.debug(f"PrimaiteSession Setup: Agent Framework = {AgentFramework.RLLIB}")
+            # Ray RLlib Agent
+            self._agent_session = RLlibAgent(self._training_config_path, self._lay_down_config_path, self.session_path)
+
+        else:
+            # Invalid AgentFramework
+            raise ValueError
+
+        self.session_path: Path = self._agent_session.session_path
+        self.timestamp_str: str = self._agent_session.timestamp_str
+        self.learning_path: Path = self._agent_session.learning_path
+        self.evaluation_path: Path = self._agent_session.evaluation_path
+
+    def learn(
+        self,
+        **kwargs: Any,
+    ) -> None:
+        """
+        Train the agent.
+
+        :param kwargs: Any agent-framework specific key word args.
+        """
+        if not self._training_config.session_type == SessionType.EVAL:
+            self._agent_session.learn(**kwargs)
+
+    def evaluate(
+        self,
+        **kwargs: Any,
+    ) -> None:
+        """
+        Evaluate the agent.
+
+        :param kwargs: Any agent-framework specific key word args.
+        """
+        if not self._training_config.session_type == SessionType.TRAIN:
+            self._agent_session.evaluate(**kwargs)
+
+    def close(self) -> None:
+        """Closes the agent."""
+        self._agent_session.close()
+
+    def learn_av_reward_per_episode_dict(self) -> Dict[int, float]:
+        """Get the learn av reward per episode from file."""
+        csv_file = f"average_reward_per_episode_{self.timestamp_str}.csv"
+        return av_rewards_dict(self.learning_path / csv_file)
+
+    def eval_av_reward_per_episode_dict(self) -> Dict[int, float]:
+        """Get the eval av reward per episode from file."""
+        csv_file = f"average_reward_per_episode_{self.timestamp_str}.csv"
+        return av_rewards_dict(self.evaluation_path / csv_file)
+
+    def learn_all_transactions_dict(self) -> Dict[Tuple[int, int], Dict[str, Any]]:
+        """Get the learn all transactions from file."""
+        csv_file = f"all_transactions_{self.timestamp_str}.csv"
+        return all_transactions_dict(self.learning_path / csv_file)
+
+    def eval_all_transactions_dict(self) -> Dict[Tuple[int, int], Dict[str, Any]]:
+        """Get the eval all transactions from file."""
+        csv_file = f"all_transactions_{self.timestamp_str}.csv"
+        return all_transactions_dict(self.evaluation_path / csv_file)
+
+    def metadata_file_as_dict(self) -> Dict[str, Any]:
+        """Read the session_metadata.json file and return as a dict."""
+        with open(self.session_path / "session_metadata.json", "r") as file:
+            return json.load(file)
--- a/src/primaite/setup/init.py
+++ b/src/primaite/setup/init.py
@@ -0,0 +1,2 @@
+# © Crown-owned copyright 2023, Defence Science and Technology Laboratory UK
+"""Utilities to prepare the user's data folders."""
--- a/src/primaite/setup/_package_data/primaite_config.yaml
+++ b/src/primaite/setup/_package_data/primaite_config.yaml
@@ -0,0 +1,22 @@
+# The main PrimAITE application config file
+
+# Logging
+logging:
+  log_level: INFO
+  logger_format:
+    DEBUG: '%(asctime)s: %(message)s'
+    INFO: '%(asctime)s: %(message)s'
+    WARNING: '%(asctime)s::%(levelname)s::%(name)s::%(lineno)s::%(message)s'
+    ERROR: '%(asctime)s::%(levelname)s::%(name)s::%(lineno)s::%(message)s'
+    CRITICAL: '%(asctime)s::%(levelname)s::%(name)s::%(lineno)s::%(message)s'
+
+# Session
+session:
+  outputs:
+    plots:
+      size:
+        auto_size: false
+        width: 1500
+        height: 900
+      template: plotly_white
+      range_slider: false
--- a/src/primaite/setup/old_installation_clean_up.py
+++ b/src/primaite/setup/old_installation_clean_up.py
@@ -0,0 +1,14 @@
+# © Crown-owned copyright 2023, Defence Science and Technology Laboratory UK
+
+from primaite import getLogger
+
+_LOGGER = getLogger(__name__)
+
+
+def run() -> None:
+    """Perform the full clean-up."""
+    pass
+
+
+if __name__ == "__main__":
+    run()
--- a/src/primaite/setup/reset_demo_notebooks.py
+++ b/src/primaite/setup/reset_demo_notebooks.py
@@ -0,0 +1,35 @@
+# © Crown-owned copyright 2023, Defence Science and Technology Laboratory UK
+import filecmp
+import os
+import shutil
+from logging import Logger
+from pathlib import Path
+
+import pkg_resources
+
+from primaite import getLogger, PRIMAITE_PATHS
+
+_LOGGER: Logger = getLogger(__name__)
+
+
+def run(overwrite_existing: bool = True) -> None:
+    """
+    Resets the demo jupyter notebooks in the users app notebooks directory.
+
+    :param overwrite_existing: A bool to toggle replacing existing edited notebooks on or off.
+    """
+    notebooks_package_data_root = pkg_resources.resource_filename("primaite", "notebooks/_package_data")
+    for subdir, dirs, files in os.walk(notebooks_package_data_root):
+        for file in files:
+            fp = os.path.join(subdir, file)
+            path_split = os.path.relpath(fp, notebooks_package_data_root).split(os.sep)
+            target_fp = PRIMAITE_PATHS.user_notebooks_path / Path(*path_split)
+            target_fp.parent.mkdir(exist_ok=True, parents=True)
+            copy_file = not target_fp.is_file()
+
+            if overwrite_existing and not copy_file:
+                copy_file = (not filecmp.cmp(fp, target_fp)) and (".ipynb_checkpoints" not in str(target_fp))
+
+            if copy_file:
+                shutil.copy2(fp, target_fp)
+                _LOGGER.info(f"Reset example notebook: {target_fp}")
--- a/src/primaite/setup/reset_example_configs.py
+++ b/src/primaite/setup/reset_example_configs.py
@@ -0,0 +1,35 @@
+# © Crown-owned copyright 2023, Defence Science and Technology Laboratory UK
+import filecmp
+import os
+import shutil
+from pathlib import Path
+
+import pkg_resources
+
+from primaite import getLogger, PRIMAITE_PATHS
+
+_LOGGER = getLogger(__name__)
+
+
+def run(overwrite_existing: bool = True) -> None:
+    """
+    Resets the example config files in the users app config directory.
+
+    :param overwrite_existing: A bool to toggle replacing existing edited config on or off.
+    """
+    configs_package_data_root = pkg_resources.resource_filename("primaite", "config/_package_data")
+
+    for subdir, dirs, files in os.walk(configs_package_data_root):
+        for file in files:
+            fp = os.path.join(subdir, file)
+            path_split = os.path.relpath(fp, configs_package_data_root).split(os.sep)
+            target_fp = PRIMAITE_PATHS.user_config_path / "example_config" / Path(*path_split)
+            target_fp.parent.mkdir(exist_ok=True, parents=True)
+            copy_file = not target_fp.is_file()
+
+            if overwrite_existing and not copy_file:
+                copy_file = not filecmp.cmp(fp, target_fp)
+
+            if copy_file:
+                shutil.copy2(fp, target_fp)
+                _LOGGER.info(f"Reset example config: {target_fp}")
--- a/src/primaite/transactions/init.py
+++ b/src/primaite/transactions/init.py
@@ -1 +1,2 @@
-# Crown Copyright (C) Dstl 2022. DEFCON 703. Shared in confidence.
+# © Crown-owned copyright 2023, Defence Science and Technology Laboratory UK
+"""Record data of the system's state and agent's observations and actions."""
--- a/src/primaite/transactions/transaction.py
+++ b/src/primaite/transactions/transaction.py
@@ -1,57 +1,102 @@
-# Crown Copyright (C) Dstl 2022. DEFCON 703. Shared in confidence.
+# © Crown-owned copyright 2023, Defence Science and Technology Laboratory UK
 """The Transaction class."""
+from datetime import datetime
+from typing import List, Optional, Tuple, TYPE_CHECKING, Union
+
+from primaite.common.enums import AgentIdentifier
+
+if TYPE_CHECKING:
+    import numpy as np
+    from gym import spaces


 class Transaction(object):
    """Transaction class."""

-    def __init__(self, _timestamp, _agent_identifier, _episode_number, _step_number):
+    def __init__(self, agent_identifier: AgentIdentifier, episode_number: int, step_number: int) -> None:
        """
-        Init.
+        Transaction constructor.

-        Args:
-            _timestamp: The time this object was created
-            _agent_identifier: An identifier for the agent in use
-            _episode_number: The episode number
-            _step_number: The step number
+        :param agent_identifier: An identifier for the agent in use
+        :param episode_number: The episode number
+        :param step_number: The step number
        """
-        self.timestamp = _timestamp
-        self.agent_identifier = _agent_identifier
-        self.episode_number = _episode_number
-        self.step_number = _step_number
+        self.timestamp: datetime = datetime.now()
+        "The datetime of the transaction"
+        self.agent_identifier: AgentIdentifier = agent_identifier
+        "The agent identifier"
+        self.episode_number: int = episode_number
+        "The episode number"
+        self.step_number: int = step_number
+        "The step number"
+        self.obs_space: "spaces.Space" = None
+        "The observation space (pre)"
+        self.obs_space_pre: Optional[Union["np.ndarray", Tuple["np.ndarray"]]] = None
+        "The observation space before any actions are taken"
+        self.obs_space_post: Optional[Union["np.ndarray", Tuple["np.ndarray"]]] = None
+        "The observation space after any actions are taken"
+        self.reward: Optional[float] = None
+        "The reward value"
+        self.action_space: Optional[int] = None
+        "The action space invoked by the agent"
+        self.obs_space_description: Optional[List[str]] = None
+        "The env observation space description"

-    def set_obs_space_pre(self, _obs_space_pre):
+    def as_csv_data(self) -> Tuple[List, List]:
        """
-        Sets the observation space (pre).
+        Converts the Transaction to a csv data row and provides a header.

-        Args:
-            _obs_space_pre: The observation space before any actions are taken
+        :return: A tuple consisting of (header, data).
        """
-        self.obs_space_pre = _obs_space_pre
+        if isinstance(self.action_space, int):
+            action_length = self.action_space
+        else:
+            action_length = self.action_space.size

-    def set_obs_space_post(self, _obs_space_post):
-        """
-        Sets the observation space (post).
+        # Create the action space headers array
+        action_header = []
+        for x in range(action_length):
+            action_header.append("AS_" + str(x))

-        Args:
-            _obs_space_post: The observation space after any actions are taken
-        """
-        self.obs_space_post = _obs_space_post
+        # Open up a csv file
+        header = ["Timestamp", "Episode", "Step", "Reward"]
+        header = header + action_header + self.obs_space_description

-    def set_reward(self, _reward):
-        """
-        Sets the reward.
+        row = [
+            str(self.timestamp),
+            str(self.episode_number),
+            str(self.step_number),
+            str(self.reward),
+        ]
+        row = row + _turn_action_space_to_array(self.action_space) + self.obs_space.tolist()
+        return header, row

-        Args:
-            _reward: The reward value
-        """
-        self.reward = _reward

-    def set_action_space(self, _action_space):
-        """
-        Sets the action space.
+def _turn_action_space_to_array(action_space: Union[int, List[int]]) -> List[str]:
+    """
+    Turns action space into a string array so it can be saved to csv.

-        Args:
-            _action_space: The action space invoked by the agent
-        """
-        self.action_space = _action_space
+    :param action_space: The action space
+    :return: The action space as an array of strings
+    """
+    if isinstance(action_space, list):
+        return [str(i) for i in action_space]
+    else:
+        return [str(action_space)]
+
+
+def _turn_obs_space_to_array(obs_space: "np.ndarray", obs_assets: int, obs_features: int) -> List[str]:
+    """
+    Turns observation space into a string array so it can be saved to csv.
+
+    :param obs_space: The observation space
+    :param obs_assets: The number of assets (i.e. nodes or links) in the observation space
+    :param obs_features: The number of features associated with the asset
+    :return: The observation space as an array of strings
+    """
+    return_array = []
+    for x in range(obs_assets):
+        for y in range(obs_features):
+            return_array.append(str(obs_space[x][y]))
+
+    return return_array
--- a/Show More
+++ b/Show More
@@ -1 +1 @@
 .2.1
 .0.0