Merged PR 633: #3110 Final user guide comments.

## Summary Feedback following James' comments ## Test process ## Checklist - [x ] PR is linked to a **work item** - [x] **acceptance criteria** of linked ticket are met - [x] performed **self-review** of the code - [x] written **tests** for any new functionality added with this PR - [x] updated the **documentation** if this PR changes or adds functionality - [x] written/updated **design docs** if this PR implements new functionality - [x] updated the **change log** - [x] ran **pre-commit** checks for code style - [x] attended to any **TO-DOs** left in the code #3110 Final user guide comments. Related work items: #3110
Fix some issues with sphinx rendering text in jupyter notebooks
2025-03-17 09:09:59 +00:00 · 2025-03-14 16:07:08 +00:00 · 2025-03-14 16:01:55 +00:00 · 2025-03-14 16:00:30 +00:00 · 2025-03-14 14:58:36 +00:00 · 2025-03-14 14:57:33 +00:00
720 changed files with 146056 additions and 15399 deletions
--- a/.azure/azure-benchmark-pipeline.yaml
+++ b/.azure/azure-benchmark-pipeline.yaml
@@ -0,0 +1,108 @@
+trigger:
+- release/*
+
+schedules:
+- cron: "0 2 * * 1-5"  # Run at 2 AM every weekday
+  displayName: "Weekday Schedule"
+  branches:
+    include:
+    - 'refs/heads/dev'
+variables:
+  VERSION: ''
+  MAJOR_VERSION: ''
+
+jobs:
+- job: PrimAITE_Benchmark
+  timeoutInMinutes: 360 # 6-hour maximum
+  pool:
+    name: 'Imaginary Yak Pool'
+  workspace:
+    clean: all
+
+  steps:
+  - checkout: self
+    persistCredentials: true
+
+  - script: |
+      python3.10 -m venv venv
+    displayName: 'Create venv'
+
+  - script: |
+      VERSION=$(cat src/primaite/VERSION | tr -d '\n')
+      if [[ "$(Build.SourceBranch)" == "refs/heads/dev" ]]; then
+        DATE=$(date +%Y%m%d)
+        echo "${VERSION}+dev.${DATE}" > src/primaite/VERSION
+      fi
+    displayName: 'Update VERSION file for Dev Benchmark'
+
+  - script: |
+      VERSION=$(cat src/primaite/VERSION | tr -d '\n')
+      MAJOR_VERSION=$(echo $VERSION | cut -d. -f1)
+      echo "##vso[task.setvariable variable=VERSION]$VERSION"
+      echo "##vso[task.setvariable variable=MAJOR_VERSION]$MAJOR_VERSION"
+    displayName: 'Set Version Variables'
+
+  - script: |
+      source venv/bin/activate
+      pip install --upgrade pip
+      pip install -e .[dev,rl]
+      primaite setup
+    displayName: 'Install Dependencies'
+
+  - script: |
+      set -e
+      source venv/bin/activate
+      cd benchmark
+      python primaite_benchmark.py
+      cd ..
+    displayName: 'Run Benchmarking Script'
+
+  - script: |
+      tar czf primaite_v$(VERSION)_benchmark.tar.gz benchmark/results/v$(MAJOR_VERSION)/v$(VERSION)
+    displayName: 'Prepare Artifacts for Publishing'
+
+  - task: PublishPipelineArtifact@1
+    inputs:
+      targetPath: primaite_v$(VERSION)_benchmark.tar.gz
+      artifactName: 'benchmark-zip-output'
+      publishLocation: 'pipeline'
+    displayName: 'Publish Benchmark Output zip as Artifact'
+
+  - script: |
+      git config --global user.email "oss@dstl.gov.uk"
+      git config --global user.name "Defence Science and Technology Laboratory UK"
+    workingDirectory: $(System.DefaultWorkingDirectory)
+    displayName: 'Configure Git'
+    condition: and(succeeded(), startsWith(variables['Build.SourceBranch'], 'refs/heads/release'))
+
+  - script: |
+      echo "Fetching all branches..."
+      git fetch --all --prune
+
+      echo "Stashing files..."
+      git stash push -u
+
+      echo "Resolving branch name..."
+      # Extracting just the branch name from the full ref path
+      branch_name=$(echo "$(Build.SourceBranch)" | sed 's|refs/heads/||')
+      echo "Branch Name: $branch_name"
+
+      echo "Checking out branch $branch_name..."
+      git checkout $branch_name
+
+      echo "Popping stash..."
+      git stash pop
+
+      echo "Adding benchmark results..."
+      git add benchmark/results/v$(MAJOR_VERSION)/v$(VERSION)/*
+
+      echo "Committing changes..."
+      git commit -m "Automated benchmark output commit for version $(VERSION) [skip ci]"
+
+      echo "Pushing to remote..."
+      git push origin $branch_name
+    displayName: 'Commit and Push Benchmark Results'
+    workingDirectory: $(System.DefaultWorkingDirectory)
+    env:
+      GIT_CREDENTIALS: $(System.AccessToken)
+    condition: and(succeeded(), startsWith(variables['Build.SourceBranch'], 'refs/heads/release'))
--- a/.azure/azure-build-deploy-docs-pipeline.yml
+++ b/.azure/azure-build-deploy-docs-pipeline.yml
@@ -26,8 +26,12 @@ jobs:
    displayName: 'Install build dependencies'

  - script: |
-      pip install -e .[dev]
-    displayName: 'Install Yawning-Titan for docs autosummary'
+      pip install -e .[dev,rl]
+    displayName: 'Install PrimAITE for docs autosummary'
+
+  - script: |
+      sudo apt-get install pandoc
+    displayName: 'Install Pandoc'

  - script: |
      primaite setup
--- a/.azure/azure-ci-build-pipeline.yaml
+++ b/.azure/azure-ci-build-pipeline.yaml
@@ -6,45 +6,60 @@ trigger:
 - bugfix/*
 - release/*

+pr:
+  autoCancel: true
+  drafts: false
 parameters:
  # https://stackoverflow.com/a/70046417
  - name: matrix
    type: object
    default:
-    - job_name: 'UbuntuPython38'
-      py: '3.8'
+    - job_name: 'UbuntuPython39'
+      py: 'v3.9'
      img: 'ubuntu-latest'
      every_time: false
+      publish_coverage: false
    - job_name: 'UbuntuPython310'
-      py: '3.10'
+      py: 'v3.10'
      img: 'ubuntu-latest'
      every_time: true
-    - job_name: 'WindowsPython38'
-      py: '3.8'
+      publish_coverage: true
+    - job_name: 'UbuntuPython311'
+      py: 'v3.11'
+      img: 'ubuntu-latest'
+      every_time: false
+      publish_coverage: false
+    - job_name: 'WindowsPython39'
+      py: 'v3.9'
      img: 'windows-latest'
      every_time: false
-    - job_name: 'WindowsPython310'
-      py: '3.10'
+      publish_coverage: false
+    - job_name: 'WindowsPython311'
+      py: 'v3.11'
      img: 'windows-latest'
      every_time: false
-    - job_name: 'MacOSPython38'
-      py: '3.8'
+      publish_coverage: false
+    - job_name: 'MacOSPython39'
+      py: 'v3.9'
      img: 'macOS-latest'
      every_time: false
-    - job_name: 'MacOSPython310'
-      py: '3.10'
+      publish_coverage: false
+    - job_name: 'MacOSPython311'
+      py: 'v3.11'
      img: 'macOS-latest'
      every_time: false
+      publish_coverage: false

 stages:
  - stage: Test
    jobs:
    - ${{ each item in parameters.matrix }}:
      - job: ${{ item.job_name }}
+        timeoutInMinutes: 90
+        cancelTimeoutInMinutes: 1
        pool:
          vmImage: ${{ item.img }}
-
-        condition: or( eq(variables['Build.Reason'], 'PullRequest'), ${{ item.every_time }} )
+        condition: and(succeeded(), or( eq(variables['Build.Reason'], 'PullRequest'), ${{ item.every_time }} ))

        steps:
          - task: UsePythonVersion@0
@@ -53,7 +68,7 @@ stages:
            displayName: 'Use Python ${{ item.py }}'

          - script: |
-              python -m pip install pre-commit
+              python -m pip install pre-commit>=6.1
              pre-commit install
              pre-commit run --all-files
            displayName: 'Run pre-commits'
@@ -61,7 +76,6 @@ stages:
          - script: |
              python -m pip install --upgrade pip==23.0.1
              pip install wheel==0.38.4 --upgrade
-              pip install setuptools==66 --upgrade
              pip install build==0.10.0
              pip install pytest-azurepipelines
            displayName: 'Install build dependencies'
@@ -72,12 +86,12 @@ stages:

          - script: |
              PRIMAITE_WHEEL=$(ls ./dist/primaite*.whl)
-              python -m pip install $PRIMAITE_WHEEL[dev]
+              python -m pip install $PRIMAITE_WHEEL[dev,rl]
            displayName: 'Install PrimAITE'
            condition: or(eq( variables['Agent.OS'], 'Linux' ), eq( variables['Agent.OS'], 'Darwin' ))

          - script: |
-              forfiles /p dist\ /m *.whl /c "cmd /c python -m pip install @file[dev]"
+              forfiles /p dist\ /m *.whl /c "cmd /c python -m pip install @file[dev,rl]"
            displayName: 'Install PrimAITE'
            condition: eq( variables['Agent.OS'], 'Windows_NT' )

@@ -85,6 +99,55 @@ stages:
              primaite setup
            displayName: 'Perform PrimAITE Setup'

+          - task: UseDotNet@2
+            displayName: 'Install dotnet dependencies'
+            inputs:
+              packageType: 'sdk'
+              version: '2.1.x'
+
          - script: |
-              pytest -n 4
-            displayName: 'Run tests'
+              python run_test_and_coverage.py
+            displayName: 'Run tests and code coverage'
+
+          # Run the notebooks
+          - script: |
+              pytest --nbmake -n=auto src/primaite/notebooks --junit-xml=./notebook-tests/notebooks.xml
+              notebooks_exit_code=$?
+              # Fail step if exit code not equal to 0
+              if [ $notebooks_exit_code -ne 0 ]; then
+                exit 1
+              fi
+            displayName: 'Run notebooks on Linux and macOS'
+            condition: or(eq(variables['Agent.OS'], 'Linux'), eq(variables['Agent.OS'], 'Darwin'))
+
+          # Run notebooks
+          - script: |
+              pytest --nbmake -n=auto src/primaite/notebooks --junit-xml=./notebook-tests/notebooks.xml
+              set notebooks_exit_code=%ERRORLEVEL%
+              rem Fail step if exit code not equal to 0
+              if %notebooks_exit_code% NEQ 0 exit /b 1
+            displayName: 'Run notebooks on Windows'
+            condition: eq(variables['Agent.OS'], 'Windows_NT')
+
+          - task: PublishTestResults@2
+            condition: succeededOrFailed()
+            displayName: 'Publish Test Results'
+            inputs:
+              testRunner: JUnit
+              testResultsFiles: |
+                'junit/**.xml'
+                'notebook-tests/**.xml'
+              testRunTitle: 'Publish test results'
+              failTaskOnFailedTests: true
+
+          - publish: $(System.DefaultWorkingDirectory)/htmlcov/
+            # publish the html report - so we can debug the coverage if needed
+            condition: ${{ item.publish_coverage }} # should only be run once
+            artifact: coverage_report
+
+          - task: PublishCodeCoverageResults@2
+            # publish the code coverage so it can be viewed in the run coverage page
+            condition: ${{ item.publish_coverage }} # should only be run once
+            inputs:
+              codeCoverageTool: Cobertura
+              summaryFileLocation: '$(System.DefaultWorkingDirectory)/**/coverage.xml'
--- a/.azuredevops/pull_request_template.md
+++ b/.azuredevops/pull_request_template.md
@@ -1,12 +1,16 @@
-## Summary
-*Replace this text with an explanation of what the changes are and how you implemented them. Can this impact any other parts of the codebase that we should keep in mind?*
-
-## Test process
-*How have you tested this (if applicable)?*
-
-## Checklist
- [ ] This PR is linked to a **work item**
- [ ] I have performed **self-review** of the code
- [ ] I have written **tests** for any new functionality added with this PR
- [ ] I have updated the **documentation** if this PR changes or adds functionality
- [ ] I have run **pre-commit** checks for code style
+## Summary
+*Replace this text with an explanation of what the changes are and how you implemented them. Can this impact any other parts of the codebase that we should keep in mind?*
+
+## Test process
+*How have you tested this (if applicable)?*
+
+## Checklist
+- [ ] PR is linked to a **work item**
+- [ ] **acceptance criteria** of linked ticket are met
+- [ ] performed **self-review** of the code
+- [ ] written **tests** for any new functionality added with this PR
+- [ ] updated the **documentation** if this PR changes or adds functionality
+- [ ] written/updated **design docs** if this PR implements new functionality
+- [ ] updated the **change log**
+- [ ] ran **pre-commit** checks for code style
+- [ ] attended to any **TO-DOs** left in the code
--- a/.flake8
+++ b/.flake8
@@ -9,5 +9,12 @@ extend-ignore =
    E712
    D401
    F811
+    ANN002
+    ANN003
+    ANN101
+    ANN102
 exclude =
    docs/source/*
+    tests/*
+suppress-none-returning=True
+suppress-dummy-args=True
--- a/.github/ISSUE_TEMPLATE/bug_report.md
+++ b/.github/ISSUE_TEMPLATE/bug_report.md
@@ -0,0 +1,41 @@
+---
+name: Bug report
+about: Create a report to help us improve
+title: "[BUG] - <bug title goes here>"
+labels: bug
+assignees: ''
+
+---
+
+### Describe the bug:
+
+A clear and concise description of what the bug is.
+
+### To Reproduce:
+
+Steps to reproduce the behaviour:
+
+1. Import '...'
+2. Instantiate '....'
+3. Pass to '....'
+4. Run '....'
+5. See error
+
+### Expected behaviour
+
+A clear and concise description of what you expected to happen.
+
+### Screenshots/Outputs
+
+If applicable, add screenshots to help explain your problem.
+
+### Environment (please complete the following information)
+
+ - **OS:** [e.g. Ubuntu 22.04]
+ - **Python:** [e.g. 3.10.11]
+ - **PrimAITE Version:** [e.g. v2.0.0]
+ - **Software:** [e.g. cli, Jupyter, PyCharm, VSCode etc.]
+
+### Additional context
+
+Add any other context about the problem here.
--- a/.github/ISSUE_TEMPLATE/feature_request.md
+++ b/.github/ISSUE_TEMPLATE/feature_request.md
@@ -0,0 +1,24 @@
+---
+name: Feature request
+about: Suggest an idea for this project
+title: "[REQUEST] - <request title goes here>"
+labels: feature_request
+assignees: ''
+
+---
+
+### Is your feature request related to a problem?
+
+If so, please give a concise description of what the problem is. Ex. I'm always frustrated when [...]
+
+### Describe the solution you'd like:
+
+A clear and concise description of what you want to happen.
+
+### Describe alternatives you've considered:
+
+A clear and concise description of any alternative solutions or features you've considered.
+
+### Additional context:
+
+Add any other context or screenshots about the feature request here.
--- a/.github/workflows/build-sphinx.yml
+++ b/.github/workflows/build-sphinx.yml
@@ -0,0 +1,60 @@
+name: build-sphinx-to-github-pages
+
+env:
+  GITHUB_ACTOR: Autonomous-Resilient-Cyber-Defence
+  GITHUB_REPOSITORY: Autonomous-Resilient-Cyber-Defence/PrimAITE
+  GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN}}
+
+on:
+  push:
+    branches: [main]
+
+
+jobs:
+  build:
+
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        python-version: ["3.10"]
+
+    steps:
+      - uses: actions/checkout@v3
+
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v4
+        with:
+          python-version: ${{ matrix.python-version }}
+
+      - name: Install python dev
+        run: |
+          set -x
+          sudo apt-get update
+          sudo add-apt-repository ppa:deadsnakes/ppa -y
+          sudo apt install python${{ matrix.python-version}}-dev -y
+
+      - name: Install Git
+        run: |
+          set -x
+          sudo apt-get install -y git
+        shell: bash
+
+      - name: Set pip, wheel, setuptools versions
+        run: |
+          python -m pip install --upgrade pip==23.0.1
+          pip install wheel==0.38.4 --upgrade
+          pip install setuptools==66 --upgrade
+          pip install build
+
+      - name: Install PrimAITE for docs autosummary
+        run: |
+          set -x
+          python -m pip install -e .[dev,rl]
+
+      - name: Run build script for Sphinx pages
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+
+        run: |
+          set -x
+          bash $PWD/docs/build-sphinx-docs-to-github-pages.sh
--- a/.github/workflows/python-package.yml
+++ b/.github/workflows/python-package.yml
@@ -0,0 +1,57 @@
+name: Python package
+
+on:
+  push:
+    branches:
+      - main
+      - dev
+      - 'release/**'
+  pull_request:
+    branches:
+      - main
+      - dev
+      - 'release/**'
+jobs:
+  build:
+
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        python-version: ["3.9", "3.10", "3.11"]
+
+    steps:
+      - uses: actions/checkout@v3
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v4
+        with:
+          python-version: ${{ matrix.python-version }}
+
+      - name: Install python dev
+        run: |
+          sudo apt update
+          sudo add-apt-repository ppa:deadsnakes/ppa -y
+          sudo apt install python${{ matrix.python-version}}-dev -y
+
+      - name: Install Build Dependencies
+        run: |
+          python -m pip install --upgrade pip==23.0.1
+          pip install wheel==0.38.4 --upgrade
+          pip install setuptools==66 --upgrade
+          pip install build
+
+      - name: Build PrimAITE
+        run: |
+          python -m build
+
+      - name: Install PrimAITE
+        run: |
+          PRIMAITE_WHEEL=$(ls ./dist/primaite*.whl)
+          python -m pip install $PRIMAITE_WHEEL[dev,rl]
+
+      - name: Perform PrimAITE Setup
+        run: |
+          primaite setup
+
+      - name: Run tests
+        run: |
+          pytest tests/
--- a/.gitignore
+++ b/.gitignore
@@ -37,6 +37,7 @@ pip-log.txt
 pip-delete-this-directory.txt

 # Unit test / coverage reports
+junit/
 htmlcov/
 .tox/
 .nox/
@@ -53,6 +54,7 @@ cover/
 tests/assets/**/*.png
 tests/assets/**/tensorboard_logs/
 tests/assets/**/checkpoints/
+notebook-tests/*.xml

 # Translations
 *.mo
@@ -81,6 +83,10 @@ target/

 # Jupyter Notebook
 .ipynb_checkpoints
+PPO_UC2/
+# ignore everything but the executed notebooks rst in the docs/source/notebooks directory
+!docs/source/notebooks/executed_notebooks.rst
+docs/source/notebooks/**/*

 # IPython
 profile_default/
@@ -143,10 +149,23 @@ cython_debug/

 # IDE
 .idea/
-docs/source/primaite-dependencies.rst
+
+.vscode/

 # outputs
 src/primaite/outputs/
+simulation_output/
+sessions/
+PrimAITE-PPO-example-agent.zip

 # benchmark session outputs
 benchmark/output
+# src/primaite/notebooks/scratch.ipynb
+src/primaite/notebooks/scratch.py
+sandbox.py
+sandbox/
+sandbox.ipynb
+
+# benchmarking
+**/benchmark/sessions/
+**/benchmark/output/
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -1,12 +1,21 @@
 repos:
+  - repo: local
+    hooks:
+      - id: ensure-copyright-clause
+        name: ensure copyright clause
+        entry: python copyright_clause_pre_commit_hook.py
+        language: python
  - repo: http://github.com/pre-commit/pre-commit-hooks
    rev: v4.4.0
    hooks:
      - id: check-yaml
+        exclude: |
+          | scenario_with_placeholders/
+          | mini_scenario_with_simulation_variation/
      - id: end-of-file-fixer
      - id: trailing-whitespace
      - id: check-added-large-files
-        args: ['--maxkb=1000']
+        args: ['--maxkb=5000']
      - id: mixed-line-ending
      - id: requirements-txt-fixer
  - repo: http://github.com/psf/black
@@ -22,8 +31,13 @@ repos:
      - id: isort
        args: [ "--profile", "black" ]
  - repo: http://github.com/PyCQA/flake8
-    rev: 6.0.0
+    rev: 6.1.0
    hooks:
      - id: flake8
        additional_dependencies:
          - flake8-docstrings
+          - flake8-annotations
+  - repo: https://github.com/kynan/nbstripout
+    rev: 0.7.1
+    hooks:
+      - id: nbstripout
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,89 +2,258 @@

 All notable changes to this project will be documented in this file.

-The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
+The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

-## [Unreleased]
+## [4.0.0] = 2025-03-XX
+
+### Added
+-   Log observation space data by episode and step.
+-   Added ability to set the observation threshold for NMNE, file access and application executions.
+-   Added `show_history` method to Agents, allowing you to view actions taken by an agent per step. By default, `do-nothing` actions are omitted.
+-   New ``node-send-local-command`` action implemented which grants agents the ability to execute commands locally. (Previously limited to remote only)
+-   Added ability to set the observation threshold for NMNE, file access and application executions
+-   UC7 Scenario model changes including Threat Actor Profile, TAP001 and TAP003 agents plus config files and example notebooks.
+-   New how-to guides describing how to use the new extension system to customise actions, environments and rewards.
+-   Added version and plugin fields to YAML configs to ensure compatibility with future versions.
+-   Network Node Adder class provides a framework for adding nodes to a network in a standardised way.
+
+### Changed
+-   ACLs are no longer applied to layer-2 traffic.
+-   Random number seed values are recorded in simulation/seed.log if the seed is set in the config file
+    or `generate_seed_value` is set to `true`.
+-   ARP .show() method will now include the port number associated with each entry.
+-   The behaviour that services, applications, files and folders require scanning before their observations are updated is now optional.
+-   Updated the `Terminal` class to provide response information when sending remote command execution.
+-   Agents now follow a common configuration format, simplifying the configuration of agents and their extensibilty.
+-   Actions within PrimAITE are now extensible, allowing for plugin support.
+-   Added a config schema to `ObservationManager`, `ActionManager`, and `RewardFunction`.
+-   Streamlined the way agents are created from config
+-   Agent config no longer requires a dummy action space if the action space is empty, the same applies for observation space and reward function
+-   Actions now support a config schema, to allow yaml data validation and default parameter values
+-   Action parameters are no longer defined through IDs, instead meaningful data is provided directly in the action map
+-   Test and example YAMLs have been updated to match the new agent and action schemas, such as:
+    -   Removed empty action spaces, observation spaces, or reward spaces for agent which didn't use them
+    -   Relabelled action parameters to match the new action config schemas, and updated the values to no longer rely on indices
+    -   Removed action space options which were previously used for assigning meaning to action space IDs
+-   Updated tests that don't use YAMLs to still use the new action and agent schemas
+-   Nodes now use a config schema and are extensible, allowing for plugin support.
+-   Node tests have been updated to use the new node config schemas when not using YAML files.
+-   Documentation has been updated to include details of extensibility with PrimAITE.
+-   Software is created in the GOOD health state instead of UNUSED.
+-   Standardised naming convention for YAML config files using kebab-case.
+    This naming convention is used for configuring software, observations, actions and node types.
+    NB: A migration guide will be available with this release.
+
+### Fixed
+-   DNS client no longer fails to check its cache if a DNS server address is missing.
+-   DNS client now correctly inherits the node's DNS address configuration setting.
+-   ACL observations now include the ACL at index 0.
+-   SoftwareManager.show() correctly displays all the software associated with a port whether the software is listening or not.
+
+
+## [3.3.0] - 2024-09-04
+### Added
+-   Random Number Generator Seeding by specifying a random number seed in the config file.
+-   Implemented Terminal service class, providing a generic terminal simulation.
+-   Added `User`, `UserManager` and `UserSessionManager` to enable the creation of user accounts and login on Nodes.
+-   Added actions to establish SSH connections, send commands remotely and terminate SSH connections.
+-   Added actions to change users' passwords.
+-   Added a `listen_on_ports` set in the `IOSoftware` class to enable software listening on ports in addition to the
+    main port they're assigned.
+-   Added two new red applications: ``C2Beacon`` and ``C2Server`` which aim to simulate malicious network infrastructure.
+    Refer to the ``Command and Control Application Suite E2E Demonstration`` notebook for more information.
+-   Added reward calculation details to AgentHistoryItem.
+-   Added a new Privilege-Escalation-and Data-Loss-Example.ipynb notebook with a realistic cyber scenario focusing on
+    internal privilege escalation and data loss through the manipulation of SSH access and Access Control Lists (ACLs).
+-   Added a new extensible `NetworkNodeAdder` class for convenient addition of sets of nodes based on a simplified config.
+
+### Changed
+-   File and folder observations can now be configured to always show the true health status, or require scanning like before.
+-   It's now possible to disable stickiness on reward components, meaning their value returns to 0 during timesteps where agent don't issue the corresponding action. Affects `GreenAdminDatabaseUnreachablePenalty`, `WebpageUnavailablePenalty`, `WebServer404Penalty`
+-   Node observations can now be configured to show the number of active local and remote logins.
+-   Ports and IP Protocols no longer use enums. They are defined in dictionary lookups and are handled by custom validation to enable extensibility with plugins.
+-   Changed AirSpaceFrequency to a data transfer object with a registry to allow extensibility
+-   Changed the Office LAN creation convenience function to follow the new `NetworkNodeAdder` pattern. Office LANs can now also be defined in YAML config.
+
+### Fixed
+-   Folder observations showing the true health state without scanning (the old behaviour can be reenabled via config)
+-   Updated `SoftwareManager` `install` and `uninstall` to handle all functionality that was being done at the `install`
+    and `uninstall` methods in the `Node` class.
+-   Updated the `receive_payload_from_session_manager` method in `SoftwareManager` so that it now sends a copy of the
+    payload to any software listening on the destination port of the `Frame`.
+-   Made the `show` method of `Network` show all node types, including ones registered at runtime
+
+### Removed
+-   Removed the `install` and `uninstall` methods in the `Node` class.
+
+
+## [3.2.0] - 2024-07-18
+
+### Added
+-   Action penalty is a reward component that applies a negative reward for doing any action other than DONOTHING
+-   Application configuration actions for RansomwareScript, DatabaseClient, and DoSBot applications
+-   Ability to configure how long it takes to apply the service fix action
+-   Terminal service using SSH
+-   Airspaces now track the amount of data being transmitted, viewable using the `show_bandwidth_load` method
+-   Tests to verify that airspace bandwidth is applied correctly and can be configured via YAML
+-   Agent logging for agents' internal decision logic
+-   Action masking in all PrimAITE environments
+### Changed
+-   Application registry was moved to the `Application` class and now updates automatically when Application is subclassed
+-   Databases can no longer respond to request while performing a backup
+-   Application install no longer accepts an `ip_address` parameter
+-   Application install action can now be used on all applications
+-   Actions have additional logic for checking validity
+-   Frame `size` attribute now includes both core size and payload size in bytes
+-   The `speed` attribute of `NetworkInterface` has been changed from `int` to `float`
+-   Tidied up CHANGELOG
+-   Enhanced `AirSpace` logic to block transmissions that would exceed the available capacity.
+-   Updated `_can_transmit` function in `Link` to account for current load and total bandwidth capacity, ensuring transmissions do not exceed limits.
+
+### Fixed
+-   Links and airspaces can no longer transmit data if this would exceed their bandwidth
+
+
+## [3.1.0] - 2024-06-25
+
+### Added
+-   Observations for traffic amounts on host network interfaces
+-   NMAP application network discovery, including ping scan and port scan
+-   NMAP actions
+-   Automated adding copyright notices to source files
+-   More file types
+-   `show` method to files
+-   `model_dump` methods to network enums to enable better logging
+
+### Changed
+-   Updated file system actions to stop failures when creating duplicate files
+-   Improved parsing of ACL add rule actions to make some parameters optional
+
+### Fixed
+-   Fixed database client uninstall failing due to persistent connections
+-   Fixed packet storm when pinging broadcast addresses
+
+
+## [3.0.0] - 2024-06-10
+
+### Added
+-   New simulation module
+-   Multi agent reinforcement learning support
+-   File system class to manage files and folders
+-   Software for nodes that can have its own behaviour
+-   Software classes to model FTP, Postgres databases, web traffic, NTP
+-   Much more detailed network simulation including packets, links, and network interfaces
+-   More node types: host, computer, server, router, switch, wireless router, and firewalls
+-   Network Hardware - NIC, SwitchPort, Node, and Link. Nodes have fundamental services like ARP, ICMP, and PCAP running them by default.
+-   Malicious network event detection
+-   New `game` module for managing agents
+-   ACL rule wildcard masking
+-   Network broadcasting
+-   Wireless transmission
+-   More detailed documentation
+-   Example jupyter notebooks to demonstrate new functionality
+-   More reward components
+-   Packet capture logs
+-   Node system logs
+-   Per-step full simulation state log
+-   Attack randomisation with respect to timing and attack source
+-   Ability to set log level via CLI
+-   Ability to vary the YAML configuration per-episode
+-   Developer CLI tools for enhanced debugging (with `primaite dev-mode enable`)
+-   `show` function to many simulation objects to inspect their current state
+
+### Changed
+-   Decoupled the environment from the simulation by adding the `game` interface layer
+-   Made agents share a common base class
+-   Added more actions
+-   Made all agents use CAOS actions, including red and green agents
+-   Reworked YAML configuration file schema
+-   Reworked the reward system to be component-based
+-   Changed agent logs to create a JSON output instead of CSV with more detailed action information
+-   Made observation space flattening optional
+-   Made all logging optional
+-   Agent actions now provide responses with a success code
+
+### Removed
+-   Legacy simulation modules
+-   Legacy training modules
+-   Tests for legacy code
+-   Hardcoded IERs and PoL, traffic generation is now handled by agents and software
+-   Inbuilt agent training scripts
+

 ## [2.0.0] - 2023-07-26

 ### Added
- Command Line Interface (CLI) for easy access and streamlined usage of PrimAITE.
- Application Directories to enable PrimAITE as a Python package with predefined directories for storage.
- Support for Ray Rllib, allowing training of PPO and A2C agents using Stable Baselines3 and Ray RLlib.
- Random Red Agent to train the blue agent against, with options for randomised Red Agent `POL` and `IER`.
- Repeatability of sessions through seed settings, and deterministic or stochastic evaluation options.
- Session loading to revisit previously run sessions for SB3 Agents.
- Agent Session Classes (`AgentSessionABC` and `HardCodedAgentSessionABC`) to standardise agent training with a common interface.
- Standardised Session Output in a structured format in the user's app sessions directory, providing four types of outputs:
-  1. Session Metadata
-  2. Results
-  3. Diagrams
-  4. Saved agents (training checkpoints and a final trained agent).
- Configurable Observation Space managed by the `ObservationHandler` class for a more flexible observation space setup.
- Benchmarking of PrimAITE performance, showcasing session and step durations for reference.
- Documentation overhaul, including automatic API and test documentation with recursive Sphinx auto-summary, using the Furo theme for responsive light/dark theme, and enhanced navigation with `sphinx-code-tabs` and `sphinx-copybutton`.
+-   Command Line Interface (CLI) for easy access and streamlined usage of PrimAITE.
+-   Application Directories to enable PrimAITE as a Python package with predefined directories for storage.
+-   Support for Ray Rllib, allowing training of PPO and A2C agents using Stable Baselines3 and Ray RLlib.
+-   Random Red Agent to train the blue agent against, with options for randomised Red Agent `POL` and `IER`.
+-   Repeatability of sessions through seed settings, and deterministic or stochastic evaluation options.
+-   Session loading to revisit previously run sessions for SB3 Agents.
+-   Agent Session Classes (`AgentSessionABC` and `HardCodedAgentSessionABC`) to standardise agent training with a common interface.
+-   Standardised Session Output in a structured format in the user's app sessions directory, providing four types of outputs: Session Metadata, Results, Diagrams, Trained agents.
+-   Configurable Observation Space managed by the `ObservationHandler` class for a more flexible observation space setup.
+-   Benchmarking of PrimAITE performance, showcasing session and step durations for reference.
+-   Documentation overhaul, including automatic API and test documentation with recursive Sphinx auto-summary, using the Furo theme for responsive light/dark theme, and enhanced navigation with `sphinx-code-tabs` and `sphinx-copybutton`.

 ### Changed
- Action Space updated to discrete spaces, introducing a new `ANY` action space option for combined `NODE` and `ACL` actions.
- Improved `Node` attribute naming convention for consistency, now adhering to `Pascal Case`.
- Package Structure has been refactored for better build, distribution, and installation, with all source code now in the `src/` directory, and the `PRIMAITE` Python package renamed to `primaite` to adhere to PEP-8 Package & Module Names.
- Docs and Tests now sit outside the `src/` directory.
- Non-python files (example config files, Jupyter notebooks, etc.) now sit inside a `*/_package_data/` directory in their respective sub-packages.
- All dependencies are now defined in the `pyproject.toml` file.
- Introduced individual configuration for the number of episodes and time steps for training and evaluation sessions, with separate config values for each.
- Decoupled the lay down config file from the training config, allowing more flexibility in configuration management.
- Updated `Transactions` to only report pre-action observation, improving the CSV header and providing more human-readable descriptions for columns relating to observations.
- Changes to `AccessControlList`, where the `acl` dictionary is now a list to accommodate changes to ACL action space and positioning of `ACLRules` inside the list to signal their level of priority.
+-   Action Space updated to discrete spaces, introducing a new `ANY` action space option for combined `NODE` and `ACL` actions.
+-   Improved `Node` attribute naming convention for consistency, now adhering to `Pascal Case`.
+-   Package Structure has been refactored for better build, distribution, and installation, with all source code now in the `src/` directory, and the `PRIMAITE` Python package renamed to `primaite` to adhere to PEP-8 Package & Module Names.
+-   Docs and Tests now sit outside the `src/` directory.
+-   Non-python files (example config files, Jupyter notebooks, etc.) now sit inside a `*/_package_data/` directory in their respective sub-packages.
+-   All dependencies are now defined in the `pyproject.toml` file.
+-   Introduced individual configuration for the number of episodes and time steps for training and evaluation sessions, with separate config values for each.
+-   Decoupled the lay down config file from the training config, allowing more flexibility in configuration management.
+-   Updated `Transactions` to only report pre-action observation, improving the CSV header and providing more human-readable descriptions for columns relating to observations.
+-   Changes to `AccessControlList`, where the `acl` dictionary is now a list to accommodate changes to ACL action space and positioning of `ACLRules` inside the list to signal their level of priority.


 ### Fixed
- Various bug fixes, including Green IERs separation, correct clearing of links in the reference environment, and proper reward calculation.
- Logic to check if a node is OFF before executing actions on the node by the blue agent, preventing erroneous state changes.
- Improved functionality of Resetting a Node, adding "SHUTTING DOWN" and "BOOTING" operating states for more reliable reset commands.
- Corrected the order of actions in the `Primaite` env to ensure the blue agent uses the current state for decision-making.
+-   Various bug fixes, including Green IERs separation, correct clearing of links in the reference environment, and proper reward calculation.
+-   Logic to check if a node is OFF before executing actions on the node by the blue agent, preventing erroneous state changes.
+-   Improved functionality of Resetting a Node, adding "SHUTTING DOWN" and "BOOTING" operating states for more reliable reset commands.
+-   Corrected the order of actions in the `Primaite` env to ensure the blue agent uses the current state for decision-making.
+

 ## [1.1.1] - 2023-06-27

-### Bug Fixes
-* Fixed bug whereby 'reference' environment links reach bandwidth capacity and are never cleared due to green & red IERs being applied to them. This bug had a knock-on effect that meant IERs were being blocked based on the full capacity of links on the reference environment which was not correct; they should only be based on the link capacity of the 'live' environment. This fix has been addressed by:
-  * Implementing a reference copy of all green IERs (`self.green_iers_reference`).
-  * Clearing the traffic on reference IERs at the same time as the live IERs.
-  * Passing the `green_iers_reference` to the `apply_iers` function at the reference stage.
-  * Passing the `green_iers_reference` as an additional argument to `calculate_reward_function`.
-  * Updating the green IERs section of the `calculate_reward_function` to now take into account both the green reference IERs and live IERs. The `green_ier_blocked` reward is only applied if the IER is blocked in the live environment but is running in the reference environment.
-  * Re-ordering the actions taken as part of the step function to ensure the blue action happens first before other changes.
-  * Removing the unnecessary "Reapply PoL and IERs" action from the step function.
-  * Moving the deep-copy of nodes and links to below the "Implement blue action" stage of the step function.
+### Fixed
+-   Fixed bug whereby 'reference' environment links reach bandwidth capacity and are never cleared due to green & red IERs being applied to them. This bug had a knock-on effect that meant IERs were being blocked based on the full capacity of links on the reference environment which was not correct; they should only be based on the link capacity of the 'live' environment. This fix has been addressed by:
+    -   Implementing a reference copy of all green IERs (`self.green_iers_reference`).
+    -   Clearing the traffic on reference IERs at the same time as the live IERs.
+    -   Passing the `green_iers_reference` to the `apply_iers` function at the reference stage.
+    -   Passing the `green_iers_reference` as an additional argument to `calculate_reward_function`.
+    -   Updating the green IERs section of the `calculate_reward_function` to now take into account both the green reference IERs and live IERs. The `green_ier_blocked` reward is only applied if the IER is blocked in the live environment but is running in the reference environment.
+    -   Re-ordering the actions taken as part of the step function to ensure the blue action happens first before other changes.
+    -   Removing the unnecessary "Reapply PoL and IERs" action from the step function.
+    -   Moving the deep-copy of nodes and links to below the "Implement blue action" stage of the step function.
+

 ## [1.1.0] - 2023-03-13

 ### Added
-* The user can now initiate either a TRAINING session or an EVALUATION (test) session with the Stable Baselines 3 (SB3) agents via the config_main.yaml file. During evaluation/testing, the agent policy will be fixed (no longer learning) and subjected to the SB3 `evaluate_policy()` function.
-* The user can choose whether a saved agent is loaded into the session (with reference to a URL) via the `config_main.yaml` file. They specify a Boolean true/false indicating whether a saved agent should be loaded, and specify the URL and file name.
-* Active and Service nodes now possess a new "File System State" attribute. This attribute is permitted to have the states GOOD, CORRUPT, DESTROYED, REPAIRING, and RESTORING. This new feature affects the following components:
-  * Blue agent observation space;
-  * Blue agent action space;
-  * Reward function;
-  * Node pattern-of-life.
-* The Red Agent node pattern-of-life has been enhanced so that node PoL is triggered by an 'initiator'. The initiator is either DIRECT (state change is applied to the node without any conditions), IER (state change is applied to the node based on IER entry condition), or SERVICE (state change is applied to the node based on a service state condition on the same node or a different node within the network).
-* New default config named "config_5_DATA_MANIPULATION.yaml" and associated Training Use Case Profile.
-* NodeStateInstruction has been split into `NodeStateInstructionGreen` and `NodeStateInstructionRed` to reflect the changes within the red agent pattern-of-life capability.
-* The reward function has been enhanced so that node attribute states of resetting, patching, repairing, and restarting contribute to the overall reward value.
-* The User Guide has been updated to reflect all the above changes.
+-   The user can now initiate either a TRAINING session or an EVALUATION (test) session with the Stable Baselines 3 (SB3) agents via the config_main.yaml file. During evaluation/testing, the agent policy will be fixed (no longer learning) and subjected to the SB3 `evaluate_policy()` function.
+-   The user can choose whether a saved agent is loaded into the session (with reference to a URL) via the `config_main.yaml` file. They specify a Boolean true/false indicating whether a saved agent should be loaded, and specify the URL and file name.
+-   Active and Service nodes now possess a new "File System State" attribute. This attribute is permitted to have the states GOOD, CORRUPT, DESTROYED, REPAIRING, and RESTORING. This new feature affects the following components:
+    -   Blue agent observation space;
+    -   Blue agent action space;
+    -   Reward function;
+    -   Node pattern-of-life.
+-   The Red Agent node pattern-of-life has been enhanced so that node PoL is triggered by an 'initiator'. The initiator is either DIRECT (state change is applied to the node without any conditions), IER (state change is applied to the node based on IER entry condition), or SERVICE (state change is applied to the node based on a service state condition on the same node or a different node within the network).
+-   New default config named "config_5_DATA_MANIPULATION.yaml" and associated Training Use Case Profile.
+-   NodeStateInstruction has been split into `NodeStateInstructionGreen` and `NodeStateInstructionRed` to reflect the changes within the red agent pattern-of-life capability.
+-   The reward function has been enhanced so that node attribute states of resetting, patching, repairing, and restarting contribute to the overall reward value.
+-   The User Guide has been updated to reflect all the above changes.

 ### Changed
-* "config_1_DDOS_BASIC.yaml" modified to make it more simplistic to aid evaluation testing.
-* "config_2_DDOS_BASIC.yaml" updated to reflect the addition of the File System State and the Red Agent node pattern-of-life enhancement.
-* "config_3_DOS_VERY_BASIC.yaml" updated to reflect the addition of the File System State and the Red Agent node pattern-of-life enhancement.
-* "config_UNIT_TEST.yaml" is a copy of the new "config_5_DATA_MANIPULATION.yaml" file.
-* Updates to Transactions.
+-   "config_1_DDOS_BASIC.yaml" modified to make it more simplistic to aid evaluation testing.
+-   "config_2_DDOS_BASIC.yaml" updated to reflect the addition of the File System State and the Red Agent node pattern-of-life enhancement.
+-   "config_3_DOS_VERY_BASIC.yaml" updated to reflect the addition of the File System State and the Red Agent node pattern-of-life enhancement.
+-   "config_UNIT_TEST.yaml" is a copy of the new "config_5_DATA_MANIPULATION.yaml" file.
+-   Updates to Transactions.

 ### Fixed
-* Fixed "config_2_DDOS_BASIC.yaml" by adding another ACL rule to allow traffic to flow from Node 9 to Node 3. Previously, there was no rule, so one of the green IERs could not flow by default.
-
-
-
-[unreleased]: https://github.com/Autonomous-Resilient-Cyber-Defence/PrimAITE/compare/v2.0.0...HEAD
-[2.0.0]: https://github.com/Autonomous-Resilient-Cyber-Defence/PrimAITE/releases/tag/v2.0.0
+-   Fixed "config_2_DDOS_BASIC.yaml" by adding another ACL rule to allow traffic to flow from Node 9 to Node 3. Previously, there was no rule, so one of the green IERs could not flow by default.
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -0,0 +1,36 @@
+# How to contribute to PrimAITE?
+
+
+### **Did you find a bug?**
+
+
+* **Ensure the bug was not already reported** by searching on GitHub under [Issues](https://github.com/Autonomous-Resilient-Cyber-Defence/PrimAITE/issues).
+* If you're unable to find an open issue addressing the problem, [open a new one](https://github.com/Autonomous-Resilient-Cyber-Defence/PrimAITE/issues/new?assignees=&labels=bug&projects=&template=bug_report.md&title=%5BBUG%5D+-+%3Cbug+title+goes+here%3E). Be sure to follow our bug report template with the headers **Describe the bug**, **To Reproduce**, **Expected behaviour**, **Screenshots/Outputs**, **Environment**, and **Additional context**
+
+
+### **Do you have a solution to fix the bug?**
+
+* [Fork the repository](https://github.com/Autonomous-Resilient-Cyber-Defence/PrimAITE/fork).
+* Install the pre-commit hook with `pre-commit install`.
+* Implement the bug fix.
+* Commit the bug fix to the dev branch on your fork. If the bug has an open issue under [Issues](https://github.com/Autonomous-Resilient-Cyber-Defence/PrimAITE/issues), reference the issue in the commit message (e.g. #1 references issue 1).
+* Submit a pull request from your dev branch to the Autonomous-Resilient-Cyber-Defence/PrimAITE dev branch. Again, if the bug has an open issue under [Issues](https://github.com/Autonomous-Resilient-Cyber-Defence/PrimAITE/issues), reference the issue in the pull request description.
+
+### **Did you fix whitespace, format code, or make a purely cosmetic patch?**
+
+Changes that are cosmetic in nature and do not add anything substantial to the stability, functionality, or testability of PrimAITE will generally not be accepted.
+
+### **Do you intend to add a new feature or change an existing one?**
+
+* Submit a [feature request issue](https://github.com/Autonomous-Resilient-Cyber-Defence/PrimAITE/issues/new?assignees=&labels=feature_request&projects=&template=feature_request.md&title=%5BREQUEST%5D+-+%3Crequest+title+goes+here%3E).
+* Know how to implement the new feature or change? Follow the same steps in the bug fix section above to fork, build, document, test, commit, and submit a pull request.
+
+### **Do you have questions about the source code?**
+
+Ask any question about how to use PrimAITE in our discussions section.
+
+### **Do you want to contribute to the PrimAITE documentation?**
+
+Please follow the "Do you intend to add a new feature or change an existing one?" section above and tag your feature request issue and pull request with the documentation tag.
+
+Thank you from the PrimAITE dev team! 🙌
--- a/11
+++ b/11
@@ -1,21 +1,24 @@
-MIT License
+MIT License License

-Copyright (c) 2023 - 2025 Defence Science and Technology Laboratory UK (https://dstl.gov.uk)
+MIT License Conditions

-Permission is hereby granted, free of charge, to any person obtaining a copy
+These MIT License conditions confirm the provision of the following artefacts as MIT License by Defence Science and Technology
 of this software and associated documentation files (the "Software"), to deal
 in the Software without restriction, including without limitation the rights
+
 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 copies of the Software, and to permit persons to whom the Software is
 furnished to do so, subject to the following conditions:

 The above copyright notice and this permission notice shall be included in all
 copies or substantial portions of the Software.
-
 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+
 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+
 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+
 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 SOFTWARE.
--- a/PrimAITE_logo_transparent.png
+++ b/PrimAITE_logo_transparent.png
--- a/README.md
+++ b/README.md
@@ -1,50 +1,163 @@
 # PrimAITE

+![image](./PrimAITE_logo_transparent.png)
+
+The ARCD Primary-level AI Training Environment (**PrimAITE**) provides an effective simulation capability for the purposes of training and evaluating AI in a cyber-defensive role. It incorporates the functionality required of a primary-level ARCD environment, which includes:
+
+- The ability to model a relevant platform / system context;
+
+- The ability to model key characteristics of a platform / system by representing connections, IP addresses, ports, traffic loading, operating systems and services;
+
+- Operates at machine-speed to enable fast training cycles.
+
+PrimAITE presents the following features:
+
+- Highly configurable (via YAML files) to provide the means to model a variety of platform / system laydowns and adversarial attack scenarios;
+
+- A Reinforcement Learning (RL) reward function based on (a) the ability to counter the specific modelled adversarial cyber-attack, and (b) the ability to ensure success;
+
+- Provision of logging to support AI evaluation and metrics gathering;
+
+- Realistic network traffic simulation, including address and sending packets via internet protocols like TCP, UDP, ICMP, and others
+
+- Routers with traffic routing and firewall capabilities
+
+- Support for multiple agents, each having their own customisable observation space, action space, and reward function definition, and either deterministic or RL-directed behaviour
+
+Whilst PrimAITE ships with a number of example modelled scenarios (a.k.a. Use Cases), it has not been developed to mandate the solving of a single cyber challenge, and instead provides a highly flexible environment application that can be extended and reconfigured by the user to suit their specific cyber defence training and evaluation needs. PrimAITE provides default networks, red agent and green agent behaviour, reward functions, and action / observation space configuration, all of which can be utilised out of the box, but which ultimately can (and in some instances should) be built upon and / or reconfigured to meet the needs of different defensive agent developers. The PrimAITE user guide provides comprehensive instruction on all PrimAITE features, functionality and components, and can be consulted in order to help guide users in any reconfiguration or enhancements they wish to undertake; a library of example Jupyter notebooks are also provided to support such work.
+
 ## Getting Started with PrimAITE

-### Pre-Requisites
-
-In order to get **PrimAITE** installed, you will need to have the following installed:
-
- `python3.8+`
- `python3-pip`
- `virtualenv`
-
+### 💫 Installation
 **PrimAITE** is designed to be OS-agnostic, and thus should work on most variations/distros of Linux, Windows, and MacOS.
+Currently, the PrimAITE wheel can only be installed from GitHub. This may change in the future with release to PyPi.

-### Installation from source
-#### 1. Navigate to the PrimAITE folder and create a new python virtual environment (venv)
+#### Windows (PowerShell)

-```unix
-python3 -m venv <name_of_venv>
+**Prerequisites:**
+* Manual install of Python >= 3.9 < 3.12
+
+**Install:**
+
+``` powershell
+mkdir ~\primaite
+cd ~\primaite
+python3 -m venv .venv
+attrib +h .venv /s /d # Hides the .venv directory
+.\.venv\Scripts\activate
+pip install primaite-{VERSION}-py3-none-any.whl[rl]
+primaite setup
 ```

-#### 2. Activate the venv
+
+#### Unix
+
+**Prerequisites:**
+* Manual install of Python >= 3.8 < 3.12
+
+``` bash
+sudo add-apt-repository ppa:deadsnakes/ppa
+sudo apt install python3.10
+sudo apt-get install python3-pip
+sudo apt-get install python3-venv
+```
+**Install:**
+
+``` bash
+mkdir ~/primaite
+cd ~/primaite
+python3 -m venv .venv
+source .venv/bin/activate
+pip install primaite-{VERSION}-py3-none-any.whl[rl]
+primaite setup
+```
+
+
+
+### Developer Install from Source
+To make your own changes to PrimAITE, perform the install from source (developer install)
+
+#### 1. Clone the PrimAITE repository
+``` unix
+git clone git@github.com:Autonomous-Resilient-Cyber-Defence/PrimAITE.git
+```
+
+#### 2. CD into the repo directory
+``` unix
+cd PrimAITE
+```
+#### 3. Create a new python virtual environment (venv)
+
+```unix
+python3 -m venv venv
+```
+
+#### 4. Activate the venv

 ##### Unix
 ```bash
-source <name_of_venv>/bin/activate
+source venv/bin/activate
 ```

-##### Windows
+##### Windows (Powershell)
 ```powershell
-.\<name_of_venv>\Scripts\activate
+.\venv\Scripts\activate
 ```

-#### 3. Install `primaite` into the venv along with all of it's dependencies
+#### 5. Install `primaite` with the dev extra into the venv along with all of it's dependencies

 ```bash
-python3 -m pip install -e .
+python3 -m pip install -e .[dev,rl]
 ```

-### Development Installation
-To install the development dependencies, postfix the command in step 3 above with the `[dev]` extra. Example:
+#### 6. Perform the PrimAITE setup:

 ```bash
-python3 -m pip install -e .[dev]
+primaite setup
 ```

-## Building documentation
+#### Note
+*It is possible to install PrimAITE without Ray RLLib, StableBaselines3, or any deep learning libraries by omitting the `rl` flag in the pip install command.*
+
+### Running PrimAITE
+
+Use the provided jupyter notebooks as a starting point to try running PrimAITE. They are automatically copied to your PrimAITE notebook folder when you run `primaite setup`.
+
+#### 1. Activate the virtual environment
+
+##### Windows (Powershell)
+```powershell
+.\venv\Scripts\activate
+```
+
+##### Unix
+```bash
+source venv/bin/activate
+```
+
+#### 2. Open jupyter notebook
+
+```bash
+python -m jupyter notebook
+```
+Then, click the URL provided by the jupyter command to open the jupyter application in your browser. You can also open notebooks in your IDE if supported.
+
+## 📚 Documentation
+
+### Pre requisites
+
+Building the documentation requires the installation of Pandoc
+
+##### Unix
+```bash
+sudo apt-get install pandoc
+```
+
+##### Other operating systems
+Follow the steps in https://pandoc.org/installing.html
+
+### Building the documentation
+
 The PrimAITE documentation can be built with the following commands:

 ##### Unix
@@ -53,12 +166,12 @@ cd docs
 make html
 ```

-##### Windows
+##### Windows (Powershell)
 ```powershell
 cd docs
 .\make.bat html
 ```

-This will build the documentation as a collection of HTML files which uses the Read The Docs sphinx theme. Other build
-options are available but may require additional dependencies such as LaTeX and PDF. Please refer to the Sphinx documentation
-for your specific output requirements.
+
+## Example notebooks
+Check out the example notebooks to learn more about how PrimAITE works and how you can use it to train agents. They are automatically copied to your primaite installation directory when you run `primaite setup`.
--- a/_config.yml
+++ b/_config.yml
@@ -0,0 +1,3 @@
+# Used by nbmake to change build pipeline notebook timeout
+execute:
+  timeout: 600
--- a/benchmark/benchmark.py
+++ b/benchmark/benchmark.py
@@ -0,0 +1,22 @@
+# © Crown-owned copyright 2025, Defence Science and Technology Laboratory UK
+from typing import Any, Dict, Optional, Tuple
+
+from gymnasium.core import ObsType
+
+from primaite.session.environment import PrimaiteGymEnv
+
+
+class BenchmarkPrimaiteGymEnv(PrimaiteGymEnv):
+    """
+    Class that extends the PrimaiteGymEnv.
+
+    The reset method is extended so that the average rewards per episode are recorded.
+    """
+
+    total_time_steps: int = 0
+
+    def reset(self, seed: Optional[int] = None) -> Tuple[ObsType, Dict[str, Any]]:
+        """Overrides the PrimAITEGymEnv reset so that the total timesteps is saved."""
+        self.total_time_steps += self.game.step_counter
+
+        return super().reset(seed=seed)
--- a/benchmark/primaite_benchmark.py
+++ b/benchmark/primaite_benchmark.py
@@ -1,206 +1,93 @@
-# © Crown-owned copyright 2023, Defence Science and Technology Laboratory UK
+# © Crown-owned copyright 2025, Defence Science and Technology Laboratory UK
 import json
-import platform
 import shutil
-import sys
 from datetime import datetime
 from pathlib import Path
-from typing import Any, Dict, Final, Optional, Tuple, Union
-from unittest.mock import patch
+from typing import Any, Dict, Final, Tuple

-import GPUtil
-import plotly.graph_objects as go
-import polars as pl
-import psutil
-import yaml
-from plotly.graph_objs import Figure
-from pylatex import Command, Document
-from pylatex import Figure as LatexFigure
-from pylatex import Section, Subsection, Tabular
-from pylatex.utils import bold
+from report import build_benchmark_md_report, md2pdf
+from stable_baselines3 import PPO

 import primaite
-from primaite.config.lay_down_config import data_manipulation_config_path
-from primaite.data_viz.session_plots import get_plotly_config
-from primaite.environment.primaite_env import Primaite
-from primaite.primaite_session import PrimaiteSession
+from benchmark import BenchmarkPrimaiteGymEnv
+from primaite.config.load import data_manipulation_config_path

 _LOGGER = primaite.getLogger(__name__)

+_MAJOR_V = primaite.__version__.split(".")[0]
+
 _BENCHMARK_ROOT = Path(__file__).parent
-_RESULTS_ROOT: Final[Path] = _BENCHMARK_ROOT / "results"
-_RESULTS_ROOT.mkdir(exist_ok=True, parents=True)
+_RESULTS_ROOT: Final[Path] = _BENCHMARK_ROOT / "results" / f"v{_MAJOR_V}"
+_VERSION_ROOT: Final[Path] = _RESULTS_ROOT / f"v{primaite.__version__}"
+_SESSION_METADATA_ROOT: Final[Path] = _VERSION_ROOT / "session_metadata"

-_OUTPUT_ROOT: Final[Path] = _BENCHMARK_ROOT / "output"
-# Clear and recreate the output directory
-if _OUTPUT_ROOT.exists():
-    shutil.rmtree(_OUTPUT_ROOT)
-_OUTPUT_ROOT.mkdir()
-
-_TRAINING_CONFIG_PATH = _BENCHMARK_ROOT / "config" / "benchmark_training_config.yaml"
-_LAY_DOWN_CONFIG_PATH = data_manipulation_config_path()
+_SESSION_METADATA_ROOT.mkdir(parents=True, exist_ok=True)


-def get_size(size_bytes: int):
-    """
-    Scale bytes to its proper format.
+class BenchmarkSession:
+    """Benchmark Session class."""

-    e.g:
-        1253656 => '1.20MB'
-        1253656678 => '1.17GB'
+    gym_env: BenchmarkPrimaiteGymEnv
+    """Gym environment used by the session to train."""

-    :
-    """
-    factor = 1024
-    for unit in ["", "K", "M", "G", "T", "P"]:
-        if size_bytes < factor:
-            return f"{size_bytes:.2f}{unit}B"
-        size_bytes /= factor
+    num_episodes: int
+    """Number of episodes to run the training session."""

+    episode_len: int
+    """The number of steps per episode."""

-def _get_system_info() -> Dict:
-    """Builds and returns a dict containing system info."""
-    uname = platform.uname()
-    cpu_freq = psutil.cpu_freq()
-    virtual_mem = psutil.virtual_memory()
-    swap_mem = psutil.swap_memory()
-    gpus = GPUtil.getGPUs()
-    return {
-        "System": {
-            "OS": uname.system,
-            "OS Version": uname.version,
-            "Machine": uname.machine,
-            "Processor": uname.processor,
-        },
-        "CPU": {
-            "Physical Cores": psutil.cpu_count(logical=False),
-            "Total Cores": psutil.cpu_count(logical=True),
-            "Max Frequency": f"{cpu_freq.max:.2f}Mhz",
-        },
-        "Memory": {"Total": get_size(virtual_mem.total), "Swap Total": get_size(swap_mem.total)},
-        "GPU": [{"Name": gpu.name, "Total Memory": f"{gpu.memoryTotal}MB"} for gpu in gpus],
-    }
+    total_steps: int
+    """Number of steps to run the training session."""

+    batch_size: int
+    """Number of steps for each episode."""

-def _build_benchmark_latex_report(
-    benchmark_metadata_dict: Dict, this_version_plot_path: Path, all_version_plot_path: Path
-):
-    geometry_options = {"tmargin": "2.5cm", "rmargin": "2.5cm", "bmargin": "2.5cm", "lmargin": "2.5cm"}
-    data = benchmark_metadata_dict
-    primaite_version = data["primaite_version"]
+    learning_rate: float
+    """Learning rate for the model."""

-    # Create a new document
-    doc = Document("report", geometry_options=geometry_options)
-    # Title
-    doc.preamble.append(Command("title", f"PrimAITE {primaite_version} Learning Benchmark"))
-    doc.preamble.append(Command("author", "PrimAITE Dev Team"))
-    doc.preamble.append(Command("date", datetime.now().date()))
-    doc.append(Command("maketitle"))
+    start_time: datetime
+    """Start time for the session."""

-    sessions = data["total_sessions"]
-    episodes = data["training_config"]["num_train_episodes"]
-    steps = data["training_config"]["num_train_steps"]
-
-    # Body
-    with doc.create(Section("Introduction")):
-        doc.append(
-            f"PrimAITE v{primaite_version} was benchmarked automatically upon release. Learning rate metrics "
-            f"were captured to be referenced during system-level testing and user acceptance testing (UAT)."
-        )
-        doc.append(
-            f"\nThe benchmarking process consists of running {sessions} training session using the same "
-            f"training and lay down config files. Each session trains an agent for {episodes} episodes, "
-            f"with each episode consisting of {steps} steps."
-        )
-        doc.append(
-            f"\nThe mean reward per episode from each session is captured. This is then used to calculate a "
-            f"combined average reward per episode from the {sessions} individual sessions for smoothing. "
-            f"Finally, a 25-widow rolling average of the combined average reward per session is calculated for "
-            f"further smoothing."
-        )
-
-    with doc.create(Section("System Information")):
-        with doc.create(Subsection("Python")):
-            with doc.create(Tabular("|l|l|")) as table:
-                table.add_hline()
-                table.add_row((bold("Version"), sys.version))
-                table.add_hline()
-        for section, section_data in data["system_info"].items():
-            if section_data:
-                with doc.create(Subsection(section)):
-                    if isinstance(section_data, dict):
-                        with doc.create(Tabular("|l|l|")) as table:
-                            table.add_hline()
-                            for key, value in section_data.items():
-                                table.add_row((bold(key), value))
-                                table.add_hline()
-                    elif isinstance(section_data, list):
-                        headers = section_data[0].keys()
-                        tabs_str = "|".join(["l" for _ in range(len(headers))])
-                        tabs_str = f"|{tabs_str}|"
-                        with doc.create(Tabular(tabs_str)) as table:
-                            table.add_hline()
-                            table.add_row([bold(h) for h in headers])
-                            table.add_hline()
-                            for item in section_data:
-                                table.add_row(item.values())
-                                table.add_hline()
-
-    headers_map = {
-        "total_sessions": "Total Sessions",
-        "total_episodes": "Total Episodes",
-        "total_time_steps": "Total Steps",
-        "av_s_per_session": "Av Session Duration (s)",
-        "av_s_per_step": "Av Step Duration (s)",
-        "av_s_per_100_steps_10_nodes": "Av Duration per 100 Steps per 10 Nodes (s)",
-    }
-    with doc.create(Section("Stats")):
-        with doc.create(Subsection("Benchmark Results")):
-            with doc.create(Tabular("|l|l|")) as table:
-                table.add_hline()
-                for section, header in headers_map.items():
-                    if section.startswith("av_"):
-                        table.add_row((bold(header), f"{data[section]:.4f}"))
-                    else:
-                        table.add_row((bold(header), data[section]))
-                    table.add_hline()
-
-    with doc.create(Section("Graphs")):
-        with doc.create(Subsection(f"PrimAITE {primaite_version} Learning Benchmark Plot")):
-            with doc.create(LatexFigure(position="h!")) as pic:
-                pic.add_image(str(this_version_plot_path))
-                pic.add_caption(f"PrimAITE {primaite_version} Learning Benchmark Plot")
-
-        with doc.create(Subsection("PrimAITE All Versions Learning Benchmark Plot")):
-            with doc.create(LatexFigure(position="h!")) as pic:
-                pic.add_image(str(all_version_plot_path))
-                pic.add_caption("PrimAITE All Versions Learning Benchmark Plot")
-
-    doc.generate_pdf(str(this_version_plot_path).replace(".png", ""), clean_tex=True)
-
-
-class BenchmarkPrimaiteSession(PrimaiteSession):
-    """A benchmarking primaite session."""
+    end_time: datetime
+    """End time for the session."""

    def __init__(
        self,
-        training_config_path: Union[str, Path],
-        lay_down_config_path: Union[str, Path],
+        gym_env: BenchmarkPrimaiteGymEnv,
+        episode_len: int,
+        num_episodes: int,
+        n_steps: int,
+        batch_size: int,
+        learning_rate: float,
    ):
-        super().__init__(training_config_path, lay_down_config_path)
-        self.setup()
+        """Initialise the BenchmarkSession."""
+        self.gym_env = gym_env
+        self.episode_len = episode_len
+        self.n_steps = n_steps
+        self.num_episodes = num_episodes
+        self.total_steps = self.num_episodes * self.episode_len
+        self.batch_size = batch_size
+        self.learning_rate = learning_rate

-    @property
-    def env(self) -> Primaite:
-        """Direct access to the env for ease of testing."""
-        return self._agent_session._env  # noqa
+    def train(self):
+        """Run the training session."""
+        # start timer for session
+        self.start_time = datetime.now()
+        model = PPO(
+            policy="MlpPolicy",
+            env=self.gym_env,
+            learning_rate=self.learning_rate,
+            n_steps=self.n_steps,
+            batch_size=self.batch_size,
+            verbose=0,
+            tensorboard_log="./PPO_UC2/",
+        )
+        model.learn(total_timesteps=self.total_steps)

-    def __enter__(self):
-        return self
+        # end timer for session
+        self.end_time = datetime.now()

-    def __exit__(self, type, value, tb):
-        shutil.rmtree(self.session_path)
-        _LOGGER.debug(f"Deleted benchmark session directory: {self.session_path}")
+        self.session_metadata = self.generate_learn_metadata_dict()

    def _learn_benchmark_durations(self) -> Tuple[float, float, float]:
        """
@@ -214,235 +101,112 @@ class BenchmarkPrimaiteSession(PrimaiteSession):
        :return: The learning benchmark durations as a Tuple of three floats:
            Tuple[total_s, s_per_step, s_per_100_steps_10_nodes].
        """
-        data = self.metadata_file_as_dict()
-        start_dt = datetime.fromisoformat(data["start_datetime"])
-        end_dt = datetime.fromisoformat(data["end_datetime"])
-        delta = end_dt - start_dt
+        delta = self.end_time - self.start_time
        total_s = delta.total_seconds()

-        total_steps = data["learning"]["total_time_steps"]
+        total_steps = self.batch_size * self.num_episodes
        s_per_step = total_s / total_steps

-        num_nodes = self.env.num_nodes
+        num_nodes = len(self.gym_env.game.simulation.network.nodes)
        num_intervals = total_steps / 100
        av_interval_time = total_s / num_intervals
        s_per_100_steps_10_nodes = av_interval_time / (num_nodes / 10)

        return total_s, s_per_step, s_per_100_steps_10_nodes

-    def learn_metadata_dict(self) -> Dict[str, Any]:
+    def generate_learn_metadata_dict(self) -> Dict[str, Any]:
        """Metadata specific to the learning session."""
        total_s, s_per_step, s_per_100_steps_10_nodes = self._learn_benchmark_durations()
+        self.gym_env.total_reward_per_episode.pop(0)  # remove episode 0
        return {
-            "total_episodes": self.env.actual_episode_count,
-            "total_time_steps": self.env.total_step_count,
+            "total_episodes": self.gym_env.episode_counter,
+            "total_time_steps": self.gym_env.total_time_steps,
            "total_s": total_s,
            "s_per_step": s_per_step,
            "s_per_100_steps_10_nodes": s_per_100_steps_10_nodes,
-            "av_reward_per_episode": self.learn_av_reward_per_episode_dict(),
+            "total_reward_per_episode": self.gym_env.total_reward_per_episode,
        }


-def _get_benchmark_session_path(session_timestamp: datetime) -> Path:
-    return _OUTPUT_ROOT / session_timestamp.strftime("%Y-%m-%d_%H-%M-%S")
-
-
-def _get_benchmark_primaite_session() -> BenchmarkPrimaiteSession:
-    with patch("primaite.agents.agent_abc.get_session_path", _get_benchmark_session_path) as mck:
-        mck.session_timestamp = datetime.now()
-        return BenchmarkPrimaiteSession(_TRAINING_CONFIG_PATH, _LAY_DOWN_CONFIG_PATH)
-
-
-def _build_benchmark_results_dict(start_datetime: datetime, metadata_dict: Dict) -> dict:
-    n = len(metadata_dict)
-    with open(_TRAINING_CONFIG_PATH, "r") as file:
-        training_config_dict = yaml.safe_load(file)
-    with open(_LAY_DOWN_CONFIG_PATH, "r") as file:
-        lay_down_config_dict = yaml.safe_load(file)
-    averaged_data = {
-        "start_timestamp": start_datetime.isoformat(),
-        "end_datetime": datetime.now().isoformat(),
-        "primaite_version": primaite.__version__,
-        "system_info": _get_system_info(),
-        "total_sessions": n,
-        "total_episodes": sum(d["total_episodes"] for d in metadata_dict.values()),
-        "total_time_steps": sum(d["total_time_steps"] for d in metadata_dict.values()),
-        "av_s_per_session": sum(d["total_s"] for d in metadata_dict.values()) / n,
-        "av_s_per_step": sum(d["s_per_step"] for d in metadata_dict.values()) / n,
-        "av_s_per_100_steps_10_nodes": sum(d["s_per_100_steps_10_nodes"] for d in metadata_dict.values()) / n,
-        "combined_av_reward_per_episode": {},
-        "session_av_reward_per_episode": {k: v["av_reward_per_episode"] for k, v in metadata_dict.items()},
-        "training_config": training_config_dict,
-        "lay_down_config": lay_down_config_dict,
-    }
-
-    episodes = metadata_dict[1]["av_reward_per_episode"].keys()
-
-    for episode in episodes:
-        combined_av_reward = sum(metadata_dict[k]["av_reward_per_episode"][episode] for k in metadata_dict.keys()) / n
-        averaged_data["combined_av_reward_per_episode"][episode] = combined_av_reward
-
-    return averaged_data
-
-
-def _get_df_from_episode_av_reward_dict(data: Dict):
-    data: Dict = {"episode": data.keys(), "av_reward": data.values()}
-
-    return (
-        pl.from_dict(data)
-        .with_columns(rolling_mean=pl.col("av_reward").rolling_mean(window_size=25))
-        .rename({"rolling_mean": "rolling_av_reward"})
-    )
-
-
-def _plot_benchmark_metadata(
-    benchmark_metadata_dict: Dict,
-    title: Optional[str] = None,
-    subtitle: Optional[str] = None,
-) -> Figure:
-    if title:
-        if subtitle:
-            title = f"{title} <br>{subtitle}</sup>"
-    else:
-        if subtitle:
-            title = subtitle
-
-    config = get_plotly_config()
-    layout = go.Layout(
-        autosize=config["size"]["auto_size"],
-        width=config["size"]["width"],
-        height=config["size"]["height"],
-    )
-    # Create the line graph with a colored line
-    fig = go.Figure(layout=layout)
-    fig.update_layout(template=config["template"])
-
-    for session, av_reward_dict in benchmark_metadata_dict["session_av_reward_per_episode"].items():
-        df = _get_df_from_episode_av_reward_dict(av_reward_dict)
-        fig.add_trace(
-            go.Scatter(
-                x=df["episode"],
-                y=df["av_reward"],
-                mode="lines",
-                name=f"Session {session}",
-                opacity=0.25,
-                line={"color": "#a6a6a6"},
-            )
-        )
-
-    df = _get_df_from_episode_av_reward_dict(benchmark_metadata_dict["combined_av_reward_per_episode"])
-    fig.add_trace(
-        go.Scatter(
-            x=df["episode"], y=df["av_reward"], mode="lines", name="Combined Session Av", line={"color": "#FF0000"}
-        )
-    )
-
-    fig.add_trace(
-        go.Scatter(
-            x=df["episode"],
-            y=df["rolling_av_reward"],
-            mode="lines",
-            name="Rolling Av (Combined Session Av)",
-            line={"color": "#4CBB17"},
-        )
-    )
-
-    # Set the layout of the graph
-    fig.update_layout(
-        xaxis={
-            "title": "Episode",
-            "type": "linear",
-        },
-        yaxis={"title": "Average Reward"},
-        title=title,
-    )
-
-    return fig
-
-
-def _plot_all_benchmarks_combined_session_av():
+def _get_benchmark_primaite_environment() -> BenchmarkPrimaiteGymEnv:
    """
-    Plot the Benchmark results for each released version of PrimAITE.
+    Create an instance of the BenchmarkPrimaiteGymEnv.

-    Does this by iterating over the ``benchmark/results`` directory and
-    extracting the benchmark metadata json for each version that has been
-    benchmarked. The combined_av_reward_per_episode is extracted from each,
-    converted into a polars dataframe, and plotted as a scatter line in plotly.
+    This environment will be used to train the agents on.
    """
-    title = "PrimAITE Versions Learning Benchmark"
-    subtitle = "Rolling Av (Combined Session Av)"
-    if title:
-        if subtitle:
-            title = f"{title} <br>{subtitle}</sup>"
-    else:
-        if subtitle:
-            title = subtitle
-    config = get_plotly_config()
-    layout = go.Layout(
-        autosize=config["size"]["auto_size"],
-        width=config["size"]["width"],
-        height=config["size"]["height"],
-    )
-    # Create the line graph with a colored line
-    fig = go.Figure(layout=layout)
-    fig.update_layout(template=config["template"])
-
-    for dir in _RESULTS_ROOT.iterdir():
-        if dir.is_dir():
-            metadata_file = dir / f"{dir.name}_benchmark_metadata.json"
-            with open(metadata_file, "r") as file:
-                metadata_dict = json.load(file)
-            df = _get_df_from_episode_av_reward_dict(metadata_dict["combined_av_reward_per_episode"])
-
-            fig.add_trace(go.Scatter(x=df["episode"], y=df["rolling_av_reward"], mode="lines", name=dir.name))
-
-    # Set the layout of the graph
-    fig.update_layout(
-        xaxis={
-            "title": "Episode",
-            "type": "linear",
-        },
-        yaxis={"title": "Average Reward"},
-        title=title,
-    )
-    fig["data"][0]["showlegend"] = True
-
-    return fig
+    env = BenchmarkPrimaiteGymEnv(env_config=data_manipulation_config_path())
+    return env


-def run():
+def _prepare_session_directory():
+    """Prepare the session directory so that it is easier to clean up after the benchmarking is done."""
+    # override session path
+    session_path = _BENCHMARK_ROOT / "sessions"
+
+    if session_path.is_dir():
+        shutil.rmtree(session_path)
+
+    primaite.PRIMAITE_PATHS.user_sessions_path = session_path
+    primaite.PRIMAITE_PATHS.user_sessions_path.mkdir(exist_ok=True, parents=True)
+
+
+def run(
+    number_of_sessions: int = 5,
+    num_episodes: int = 1000,
+    episode_len: int = 128,
+    n_steps: int = 1280,
+    batch_size: int = 32,
+    learning_rate: float = 3e-4,
+) -> None:
    """Run the PrimAITE benchmark."""
-    start_datetime = datetime.now()
-    av_reward_per_episode_dicts = {}
-    for i in range(1, 11):
-        print(f"Starting Benchmark Session: {i}")
-        with _get_benchmark_primaite_session() as session:
-            session.learn()
-            av_reward_per_episode_dicts[i] = session.learn_metadata_dict()
-
-    benchmark_metadata = _build_benchmark_results_dict(
-        start_datetime=start_datetime, metadata_dict=av_reward_per_episode_dicts
-    )
+    # generate report folder
    v_str = f"v{primaite.__version__}"

    version_result_dir = _RESULTS_ROOT / v_str
-    if version_result_dir.exists():
-        shutil.rmtree(version_result_dir)
    version_result_dir.mkdir(exist_ok=True, parents=True)
+    output_path = version_result_dir / f"PrimAITE {v_str} Benchmark Report.md"

-    with open(version_result_dir / f"{v_str}_benchmark_metadata.json", "w") as file:
-        json.dump(benchmark_metadata, file, indent=4)
-    title = f"PrimAITE v{primaite.__version__.strip()} Learning Benchmark"
-    fig = _plot_benchmark_metadata(benchmark_metadata, title=title)
-    this_version_plot_path = version_result_dir / f"{title}.png"
-    fig.write_image(this_version_plot_path)
+    benchmark_start_time = datetime.now()

-    fig = _plot_all_benchmarks_combined_session_av()
+    session_metadata_dict = {}

-    all_version_plot_path = _RESULTS_ROOT / "PrimAITE Versions Learning Benchmark.png"
-    fig.write_image(all_version_plot_path)
+    _prepare_session_directory()

-    _build_benchmark_latex_report(benchmark_metadata, this_version_plot_path, all_version_plot_path)
+    # run training
+    for i in range(1, number_of_sessions + 1):
+        print(f"Starting Benchmark Session: {i}")
+
+        with _get_benchmark_primaite_environment() as gym_env:
+            session = BenchmarkSession(
+                gym_env=gym_env,
+                num_episodes=num_episodes,
+                n_steps=n_steps,
+                episode_len=episode_len,
+                batch_size=batch_size,
+                learning_rate=learning_rate,
+            )
+            session.train()
+
+            # Dump the session metadata so that we're not holding it in memory as it's large
+            with open(_SESSION_METADATA_ROOT / f"{i}.json", "w") as file:
+                json.dump(session.session_metadata, file, indent=4)
+
+    for i in range(1, number_of_sessions + 1):
+        with open(_SESSION_METADATA_ROOT / f"{i}.json", "r") as file:
+            session_metadata_dict[i] = json.load(file)
+    # generate report
+    build_benchmark_md_report(
+        benchmark_start_time=benchmark_start_time,
+        session_metadata=session_metadata_dict,
+        config_path=data_manipulation_config_path(),
+        results_root_path=_RESULTS_ROOT,
+        output_path=output_path,
+    )
+    md2pdf(
+        md_path=output_path,
+        pdf_path=str(output_path).replace(".md", ".pdf"),
+        css_path="static/styles.css",
+    )


 if __name__ == "__main__":
--- a/benchmark/report.py
+++ b/benchmark/report.py
@@ -0,0 +1,420 @@
+# © Crown-owned copyright 2025, Defence Science and Technology Laboratory UK
+import json
+import sys
+from datetime import datetime
+from os import PathLike
+from pathlib import Path
+from typing import Dict, Optional
+
+import plotly.graph_objects as go
+import polars as pl
+import yaml
+from plotly.graph_objs import Figure
+from utils import _get_system_info
+
+import primaite
+
+PLOT_CONFIG = {
+    "size": {"auto_size": False, "width": 800, "height": 640},
+    "template": "plotly_white",
+    "range_slider": False,
+}
+
+
+def _build_benchmark_results_dict(start_datetime: datetime, metadata_dict: Dict, config: Dict) -> dict:
+    """
+    Constructs a dictionary aggregating benchmark results from multiple sessions.
+
+    :param start_datetime: The datetime when the benchmarking started.
+    :param metadata_dict: Dictionary containing metadata for each session.
+    :param config: Configuration settings used during the benchmarking.
+    :return: A dictionary containing aggregated data and metadata from the benchmarking sessions.
+    """
+    num_sessions = len(metadata_dict)  # number of sessions
+
+    averaged_data = {
+        "start_timestamp": start_datetime.isoformat(),
+        "end_datetime": datetime.now().isoformat(),
+        "primaite_version": primaite.__version__,
+        "system_info": _get_system_info(),
+        "total_sessions": num_sessions,
+        "total_episodes": sum(d["total_episodes"] for d in metadata_dict.values()),
+        "total_time_steps": sum(d["total_time_steps"] for d in metadata_dict.values()),
+        "av_s_per_session": sum(d["total_s"] for d in metadata_dict.values()) / num_sessions,
+        "av_s_per_step": sum(d["s_per_step"] for d in metadata_dict.values()) / num_sessions,
+        "av_s_per_100_steps_10_nodes": sum(d["s_per_100_steps_10_nodes"] for d in metadata_dict.values())
+        / num_sessions,
+        "combined_total_reward_per_episode": {},
+        "session_total_reward_per_episode": {k: v["total_reward_per_episode"] for k, v in metadata_dict.items()},
+        "config": config,
+    }
+
+    # find the average of each episode across all sessions
+    episodes = metadata_dict[1]["total_reward_per_episode"].keys()
+
+    for episode in episodes:
+        combined_av_reward = (
+            sum(metadata_dict[k]["total_reward_per_episode"][episode] for k in metadata_dict.keys()) / num_sessions
+        )
+        averaged_data["combined_total_reward_per_episode"][episode] = combined_av_reward
+
+    return averaged_data
+
+
+def _get_df_from_episode_av_reward_dict(data: Dict) -> pl.DataFrame:
+    """
+    Converts a dictionary of episode average rewards into a Polars DataFrame.
+
+    :param data: Dictionary with episodes as keys and average rewards as values.
+    :return: Polars DataFrame with episodes and average rewards, including a rolling average.
+    """
+    data: Dict = {"episode": data.keys(), "av_reward": data.values()}
+
+    return (
+        pl.from_dict(data)
+        .with_columns(rolling_mean=pl.col("av_reward").rolling_mean(window_size=25))
+        .rename({"rolling_mean": "rolling_av_reward"})
+    )
+
+
+def _plot_benchmark_metadata(
+    benchmark_metadata_dict: Dict,
+    title: Optional[str] = None,
+    subtitle: Optional[str] = None,
+) -> Figure:
+    """
+    Plots benchmark metadata as a line graph using Plotly.
+
+    :param benchmark_metadata_dict: Dictionary containing the total reward per episode and session.
+    :param title: Optional title for the graph.
+    :param subtitle: Optional subtitle for the graph.
+    :return: Plotly figure object representing the benchmark metadata plot.
+    """
+    if title:
+        if subtitle:
+            title = f"{title} <br>{subtitle}</sup>"
+    else:
+        if subtitle:
+            title = subtitle
+
+    layout = go.Layout(
+        autosize=PLOT_CONFIG["size"]["auto_size"],
+        width=PLOT_CONFIG["size"]["width"],
+        height=PLOT_CONFIG["size"]["height"],
+    )
+    # Create the line graph with a colored line
+    fig = go.Figure(layout=layout)
+    fig.update_layout(template=PLOT_CONFIG["template"])
+
+    for session, av_reward_dict in benchmark_metadata_dict["session_total_reward_per_episode"].items():
+        df = _get_df_from_episode_av_reward_dict(av_reward_dict)
+        fig.add_trace(
+            go.Scatter(
+                x=df["episode"],
+                y=df["av_reward"],
+                mode="lines",
+                name=f"Session {session}",
+                opacity=0.25,
+                line={"color": "#a6a6a6"},
+            )
+        )
+
+    df = _get_df_from_episode_av_reward_dict(benchmark_metadata_dict["combined_total_reward_per_episode"])
+    fig.add_trace(
+        go.Scatter(
+            x=df["episode"], y=df["av_reward"], mode="lines", name="Combined Session Av", line={"color": "#FF0000"}
+        )
+    )
+
+    fig.add_trace(
+        go.Scatter(
+            x=df["episode"],
+            y=df["rolling_av_reward"],
+            mode="lines",
+            name="Rolling Av (Combined Session Av)",
+            line={"color": "#4CBB17"},
+        )
+    )
+
+    # Set the layout of the graph
+    fig.update_layout(
+        xaxis={
+            "title": "Episode",
+            "type": "linear",
+        },
+        yaxis={"title": "Total Reward"},
+        title=title,
+    )
+    fig.update_layout(
+        legend=dict(
+            yanchor="top",
+            y=0.99,
+            xanchor="left",
+            x=0.01,
+            bgcolor="rgba(255,255,255,0.3)",
+        )
+    )
+    for trace in fig["data"]:
+        if trace["name"].startswith("Session"):
+            trace["showlegend"] = False
+    fig["data"][0]["name"] = "Individual Sessions"
+    fig["data"][0]["showlegend"] = True
+
+    return fig
+
+
+def _plot_all_benchmarks_combined_session_av(results_directory: Path) -> Figure:
+    """
+    Plot the Benchmark results for each released version of PrimAITE.
+
+    Does this by iterating over the ``benchmark/results`` directory and
+    extracting the benchmark metadata json for each version that has been
+    benchmarked. The combined_total_reward_per_episode is extracted from each,
+    converted into a polars dataframe, and plotted as a scatter line in plotly.
+    """
+    major_v = primaite.__version__.split(".")[0]
+    title = f"Learning Benchmark of Minor and Bugfix Releases for Major Version {major_v}"
+    subtitle = "Rolling Av (Combined Session Av)"
+    if title:
+        if subtitle:
+            title = f"{title} <br>{subtitle}</sup>"
+    else:
+        if subtitle:
+            title = subtitle
+    layout = go.Layout(
+        autosize=PLOT_CONFIG["size"]["auto_size"],
+        width=PLOT_CONFIG["size"]["width"],
+        height=PLOT_CONFIG["size"]["height"],
+    )
+    # Create the line graph with a colored line
+    fig = go.Figure(layout=layout)
+    fig.update_layout(template=PLOT_CONFIG["template"])
+
+    for dir in results_directory.iterdir():
+        if dir.is_dir():
+            metadata_file = dir / f"{dir.name}_benchmark_metadata.json"
+            with open(metadata_file, "r") as file:
+                metadata_dict = json.load(file)
+            df = _get_df_from_episode_av_reward_dict(metadata_dict["combined_total_reward_per_episode"])
+
+            fig.add_trace(go.Scatter(x=df["episode"], y=df["rolling_av_reward"], mode="lines", name=dir.name))
+
+    # Set the layout of the graph
+    fig.update_layout(
+        xaxis={
+            "title": "Episode",
+            "type": "linear",
+        },
+        yaxis={"title": "Total Reward"},
+        title=title,
+    )
+    fig["data"][0]["showlegend"] = True
+    fig.update_layout(legend=dict(yanchor="top", y=-0.2, xanchor="left", x=0.01, orientation="h"))
+
+    return fig
+
+
+def _get_performance_benchmark_for_all_version_dict(results_directory: Path) -> Dict[str, float]:
+    """
+    Gathers performance benchmarks for all versions of the software stored in a specified directory.
+
+    This function iterates through each directory within the specified results directory,
+    extracts the av_s_per_100_steps_10_nodes from the benchmark_metadata.json files, and aggregates it into a
+    dictionary.
+
+    :param results_directory: The directory containing subdirectories for each version's benchmark data.
+    :return: A dictionary with version numbers as keys and their corresponding average performance benchmark
+        (average time per 100 steps on 10 nodes) as values.
+    """
+    performance_benchmark_dict = {}
+    for dir in results_directory.iterdir():
+        if dir.is_dir():
+            metadata_file = dir / f"{dir.name}_benchmark_metadata.json"
+            with open(metadata_file, "r") as file:
+                metadata_dict = json.load(file)
+                version = metadata_dict["primaite_version"]
+                performance_benchmark_dict[version] = metadata_dict["av_s_per_100_steps_10_nodes"]
+    return performance_benchmark_dict
+
+
+def _plot_av_s_per_100_steps_10_nodes(
+    version_times_dict: Dict[str, float],
+) -> Figure:
+    """
+    Creates a bar chart visualising the performance of each version of PrimAITE.
+
+    Performance is based on the average training time per 100 steps on 10 nodes.
+
+    :param version_times_dict: A dictionary with software versions as keys and average times as values.
+    :return: A Plotly figure object representing the bar chart of the performance metrics.
+    """
+    major_v = primaite.__version__.split(".")[0]
+    title = f"Performance of Minor and Bugfix Releases for Major Version {major_v}"
+    subtitle = "Average Training Time per 100 Steps on 10 Nodes "
+    title = f"{title} <br><sub>{subtitle}</sub>"
+
+    layout = go.Layout(
+        autosize=PLOT_CONFIG["size"]["auto_size"],
+        width=PLOT_CONFIG["size"]["width"],
+        height=PLOT_CONFIG["size"]["height"],
+    )
+    fig = go.Figure(layout=layout)
+    fig.update_layout(template=PLOT_CONFIG["template"])
+
+    versions = sorted(list(version_times_dict.keys()))
+    times = [version_times_dict[version] for version in versions]
+
+    fig.add_trace(go.Bar(x=versions, y=times, text=times, textposition="auto", texttemplate="%{y:.3f}"))
+
+    fig.update_layout(
+        xaxis_title="PrimAITE Version",
+        yaxis_title="Avg Time per 100 Steps on 10 Nodes (seconds)",
+        title=title,
+    )
+
+    return fig
+
+
+def build_benchmark_md_report(
+    benchmark_start_time: datetime,
+    session_metadata: Dict,
+    config_path: Path,
+    results_root_path: Path,
+    output_path: PathLike,
+) -> None:
+    """
+    Generates a Markdown report for a benchmarking session, documenting performance metrics and graphs.
+
+    This function orchestrates the creation of several graphs depicting various performance benchmarks and aggregates
+    them into a markdown document that includes comprehensive system and benchmark information.
+
+    :param benchmark_start_time: The datetime object representing when the benchmarking process was initiated.
+    :param session_metadata: A dictionary containing metadata for each benchmarking session.
+    :param config_path: A pathlib.Path object pointing to the configuration file used for the benchmark sessions.
+    :param results_root_path: A pathlib.Path object pointing to the directory where the results and graphs should be
+        saved.
+    """
+    # generate report folder
+    v_str = f"v{primaite.__version__}"
+
+    version_result_dir = results_root_path / v_str
+    version_result_dir.mkdir(exist_ok=True, parents=True)
+
+    # load the config file as dict
+    with open(config_path, "r") as f:
+        cfg_data = yaml.safe_load(f)
+
+    # generate the benchmark metadata dict
+    benchmark_metadata_dict = _build_benchmark_results_dict(
+        start_datetime=benchmark_start_time, metadata_dict=session_metadata, config=cfg_data
+    )
+    major_v = primaite.__version__.split(".")[0]
+    with open(version_result_dir / f"{v_str}_benchmark_metadata.json", "w") as file:
+        json.dump(benchmark_metadata_dict, file, indent=4)
+    title = f"PrimAITE v{primaite.__version__.strip()} Learning Benchmark"
+    fig = _plot_benchmark_metadata(benchmark_metadata_dict, title=title)
+    this_version_plot_path = version_result_dir / f"{title}.png"
+    fig.write_image(this_version_plot_path)
+
+    fig = _plot_all_benchmarks_combined_session_av(results_directory=results_root_path)
+
+    filename = f"PrimAITE Learning Benchmark of Minor and Bugfix Releases for Major Version {major_v}.png"
+
+    all_version_plot_path = version_result_dir / filename
+    fig.write_image(all_version_plot_path)
+
+    performance_benchmark_dict = _get_performance_benchmark_for_all_version_dict(results_directory=results_root_path)
+    fig = _plot_av_s_per_100_steps_10_nodes(performance_benchmark_dict)
+    filename = f"PrimAITE Performance of Minor and Bugfix Releases for Major Version {major_v}.png"
+    performance_benchmark_plot_path = version_result_dir / filename
+    fig.write_image(performance_benchmark_plot_path)
+
+    data = benchmark_metadata_dict
+    primaite_version = data["primaite_version"]
+
+    with open(output_path, "w") as file:
+        # Title
+        file.write(f"# PrimAITE v{primaite_version} Learning Benchmark\n")
+        file.write("## PrimAITE Dev Team\n")
+        file.write(f"### {datetime.now().date()}\n")
+        file.write("\n---\n")
+
+        sessions = data["total_sessions"]
+        episodes = session_metadata[1]["total_episodes"] - 1
+        steps = data["config"]["game"]["max_episode_length"]
+
+        # Body
+        file.write("## 1 Introduction\n")
+        file.write(
+            f"PrimAITE v{primaite_version} was benchmarked automatically upon release. Learning rate metrics "
+            f"were captured to be referenced during system-level testing and user acceptance testing (UAT).\n"
+        )
+        file.write(
+            f"The benchmarking process consists of running {sessions} training session using the same "
+            f"config file. Each session trains an agent for {episodes} episodes, "
+            f"with each episode consisting of {steps} steps.\n"
+        )
+        file.write(
+            f"The total reward per episode from each session is captured. This is then used to calculate an "
+            f"caverage total reward per episode from the {sessions} individual sessions for smoothing. "
+            f"Finally, a 25-widow rolling average of the average total reward per session is calculated for "
+            f"further smoothing.\n"
+        )
+
+        file.write("## 2 System Information\n")
+        i = 1
+        file.write(f"### 2.{i} Python\n")
+        file.write(f"**Version:** {sys.version}\n")
+
+        for section, section_data in data["system_info"].items():
+            i += 1
+            if section_data:
+                file.write(f"### 2.{i} {section}\n")
+                if isinstance(section_data, dict):
+                    for key, value in section_data.items():
+                        file.write(f"- **{key}:** {value}\n")
+
+        headers_map = {
+            "total_sessions": "Total Sessions",
+            "total_episodes": "Total Episodes",
+            "total_time_steps": "Total Steps",
+            "av_s_per_session": "Av Session Duration (s)",
+            "av_s_per_step": "Av Step Duration (s)",
+            "av_s_per_100_steps_10_nodes": "Av Duration per 100 Steps per 10 Nodes (s)",
+        }
+
+        file.write("## 3 Stats\n")
+        for section, header in headers_map.items():
+            if section.startswith("av_"):
+                file.write(f"- **{header}:** {data[section]:.4f}\n")
+            else:
+                file.write(f"- **{header}:** {data[section]}\n")
+
+        file.write("## 4 Graphs\n")
+
+        file.write(f"### 4.1 v{primaite_version} Learning Benchmark Plot\n")
+        file.write(f"![PrimAITE {primaite_version} Learning Benchmark Plot]({this_version_plot_path.name})\n")
+
+        file.write(f"### 4.2 Learning Benchmark of Minor and Bugfix Releases for Major Version {major_v}\n")
+        file.write(
+            f"![Learning Benchmark of Minor and Bugfix Releases for Major Version {major_v}]"
+            f"({all_version_plot_path.name})\n"
+        )
+
+        file.write(f"### 4.3 Performance of Minor and Bugfix Releases for Major Version {major_v}\n")
+        file.write(
+            f"![Performance of Minor and Bugfix Releases for Major Version {major_v}]"
+            f"({performance_benchmark_plot_path.name})\n"
+        )
+
+
+def md2pdf(md_path: PathLike, pdf_path: PathLike, css_path: PathLike) -> None:
+    """Generate PDF version of Markdown report."""
+    from md2pdf.core import md2pdf
+
+    md2pdf(
+        pdf_file_path=pdf_path,
+        md_file_path=md_path,
+        base_url=Path(md_path).parent,
+        css_file_path=css_path,
+    )
--- a/benchmark/results/v2/PrimAITE
+++ b/benchmark/results/v2/PrimAITE
--- a/benchmark/results/v2/v2.0.0/PrimAITE
+++ b/benchmark/results/v2/v2.0.0/PrimAITE
--- a/benchmark/results/v2/v2.0.0/PrimAITE
+++ b/benchmark/results/v2/v2.0.0/PrimAITE
--- a/benchmark/results/v2/v2.0.0/v2.0.0_benchmark_metadata.json
+++ b/benchmark/results/v2/v2.0.0/v2.0.0_benchmark_metadata.json
--- a/benchmark/results/v3/PrimAITE
+++ b/benchmark/results/v3/PrimAITE
--- a/benchmark/results/v3/v3.0.0/PrimAITE
+++ b/benchmark/results/v3/v3.0.0/PrimAITE
--- a/benchmark/results/v3/v3.0.0/PrimAITE
+++ b/benchmark/results/v3/v3.0.0/PrimAITE
--- a/benchmark/results/v3/v3.0.0/session_metadata/1.json
+++ b/benchmark/results/v3/v3.0.0/session_metadata/1.json
--- a/benchmark/results/v3/v3.0.0/session_metadata/2.json
+++ b/benchmark/results/v3/v3.0.0/session_metadata/2.json
--- a/benchmark/results/v3/v3.0.0/session_metadata/3.json
+++ b/benchmark/results/v3/v3.0.0/session_metadata/3.json
--- a/benchmark/results/v3/v3.0.0/session_metadata/4.json
+++ b/benchmark/results/v3/v3.0.0/session_metadata/4.json
--- a/benchmark/results/v3/v3.0.0/session_metadata/5.json
+++ b/benchmark/results/v3/v3.0.0/session_metadata/5.json
--- a/benchmark/results/v3/v3.0.0/v3.0.0_benchmark_metadata.json
+++ b/benchmark/results/v3/v3.0.0/v3.0.0_benchmark_metadata.json
--- a/benchmark/results/v3/v3.1.0/PrimAITE
+++ b/benchmark/results/v3/v3.1.0/PrimAITE
--- a/benchmark/results/v3/v3.1.0/PrimAITE
+++ b/benchmark/results/v3/v3.1.0/PrimAITE
--- a/benchmark/results/v3/v3.1.0/PrimAITE
+++ b/benchmark/results/v3/v3.1.0/PrimAITE
@@ -0,0 +1,38 @@
+# PrimAITE v3.1.0 Learning Benchmark
+## PrimAITE Dev Team
+### 2024-07-20
+
+---
+## 1 Introduction
+PrimAITE v3.1.0 was benchmarked automatically upon release. Learning rate metrics were captured to be referenced during system-level testing and user acceptance testing (UAT).
+The benchmarking process consists of running 5 training session using the same config file. Each session trains an agent for 1000 episodes, with each episode consisting of 128 steps.
+The total reward per episode from each session is captured. This is then used to calculate an caverage total reward per episode from the 5 individual sessions for smoothing. Finally, a 25-widow rolling average of the average total reward per session is calculated for further smoothing.
+## 2 System Information
+### 2.1 Python
+**Version:** 3.10.14 (main, Apr  6 2024, 18:45:05) [GCC 9.4.0]
+### 2.2 System
+- **OS:** Linux
+- **OS Version:** #76~20.04.1-Ubuntu SMP Thu Jun 13 18:00:23 UTC 2024
+- **Machine:** x86_64
+- **Processor:** x86_64
+### 2.3 CPU
+- **Physical Cores:** 2
+- **Total Cores:** 4
+- **Max Frequency:** 0.00Mhz
+### 2.4 Memory
+- **Total:** 15.62GB
+- **Swap Total:** 0.00B
+## 3 Stats
+- **Total Sessions:** 5
+- **Total Episodes:** 5005
+- **Total Steps:** 640000
+- **Av Session Duration (s):** 1632.8888
+- **Av Step Duration (s):** 0.0510
+- **Av Duration per 100 Steps per 10 Nodes (s):** 5.1028
+## 4 Graphs
+### 4.1 v3.1.0 Learning Benchmark Plot
+![PrimAITE 3.1.0 Learning Benchmark Plot](PrimAITE v3.1.0 Learning Benchmark.png)
+### 4.2 Learning Benchmark of Minor and Bugfix Releases for Major Version 3
+![Learning Benchmark of Minor and Bugfix Releases for Major Version 3](PrimAITE Learning Benchmark of Minor and Bugfix Releases for Major Version 3.png)
+### 4.3 Performance of Minor and Bugfix Releases for Major Version 3
+![Performance of Minor and Bugfix Releases for Major Version 3](PrimAITE Performance of Minor and Bugfix Releases for Major Version 3.png)
--- a/benchmark/results/v3/v3.1.0/PrimAITE
+++ b/benchmark/results/v3/v3.1.0/PrimAITE
--- a/benchmark/results/v3/v3.1.0/session_metadata/1.json
+++ b/benchmark/results/v3/v3.1.0/session_metadata/1.json
--- a/benchmark/results/v3/v3.1.0/session_metadata/2.json
+++ b/benchmark/results/v3/v3.1.0/session_metadata/2.json
--- a/benchmark/results/v3/v3.1.0/session_metadata/3.json
+++ b/benchmark/results/v3/v3.1.0/session_metadata/3.json
--- a/benchmark/results/v3/v3.1.0/session_metadata/4.json
+++ b/benchmark/results/v3/v3.1.0/session_metadata/4.json
--- a/benchmark/results/v3/v3.1.0/session_metadata/5.json
+++ b/benchmark/results/v3/v3.1.0/session_metadata/5.json
--- a/benchmark/results/v3/v3.1.0/v3.1.0_benchmark_metadata.json
+++ b/benchmark/results/v3/v3.1.0/v3.1.0_benchmark_metadata.json
--- a/benchmark/results/v3/v3.2.0/PrimAITE
+++ b/benchmark/results/v3/v3.2.0/PrimAITE
--- a/benchmark/results/v3/v3.2.0/PrimAITE
+++ b/benchmark/results/v3/v3.2.0/PrimAITE
--- a/benchmark/results/v3/v3.2.0/PrimAITE
+++ b/benchmark/results/v3/v3.2.0/PrimAITE
@@ -0,0 +1,38 @@
+# PrimAITE v3.2.0 Learning Benchmark
+## PrimAITE Dev Team
+### 2024-07-21
+
+---
+## 1 Introduction
+PrimAITE v3.2.0 was benchmarked automatically upon release. Learning rate metrics were captured to be referenced during system-level testing and user acceptance testing (UAT).
+The benchmarking process consists of running 5 training session using the same config file. Each session trains an agent for 1000 episodes, with each episode consisting of 128 steps.
+The total reward per episode from each session is captured. This is then used to calculate an caverage total reward per episode from the 5 individual sessions for smoothing. Finally, a 25-widow rolling average of the average total reward per session is calculated for further smoothing.
+## 2 System Information
+### 2.1 Python
+**Version:** 3.10.14 (main, Apr  6 2024, 18:45:05) [GCC 9.4.0]
+### 2.2 System
+- **OS:** Linux
+- **OS Version:** #76~20.04.1-Ubuntu SMP Thu Jun 13 18:00:23 UTC 2024
+- **Machine:** x86_64
+- **Processor:** x86_64
+### 2.3 CPU
+- **Physical Cores:** 2
+- **Total Cores:** 4
+- **Max Frequency:** 0.00Mhz
+### 2.4 Memory
+- **Total:** 15.62GB
+- **Swap Total:** 0.00B
+## 3 Stats
+- **Total Sessions:** 5
+- **Total Episodes:** 5005
+- **Total Steps:** 640000
+- **Av Session Duration (s):** 1691.5034
+- **Av Step Duration (s):** 0.0529
+- **Av Duration per 100 Steps per 10 Nodes (s):** 5.2859
+## 4 Graphs
+### 4.1 v3.2.0 Learning Benchmark Plot
+![PrimAITE 3.2.0 Learning Benchmark Plot](PrimAITE v3.2.0 Learning Benchmark.png)
+### 4.2 Learning Benchmark of Minor and Bugfix Releases for Major Version 3
+![Learning Benchmark of Minor and Bugfix Releases for Major Version 3](PrimAITE Learning Benchmark of Minor and Bugfix Releases for Major Version 3.png)
+### 4.3 Performance of Minor and Bugfix Releases for Major Version 3
+![Performance of Minor and Bugfix Releases for Major Version 3](PrimAITE Performance of Minor and Bugfix Releases for Major Version 3.png)
--- a/benchmark/results/v3/v3.2.0/PrimAITE
+++ b/benchmark/results/v3/v3.2.0/PrimAITE
--- a/benchmark/results/v3/v3.2.0/session_metadata/1.json
+++ b/benchmark/results/v3/v3.2.0/session_metadata/1.json
--- a/benchmark/results/v3/v3.2.0/session_metadata/2.json
+++ b/benchmark/results/v3/v3.2.0/session_metadata/2.json
--- a/benchmark/results/v3/v3.2.0/session_metadata/3.json
+++ b/benchmark/results/v3/v3.2.0/session_metadata/3.json
--- a/benchmark/results/v3/v3.2.0/session_metadata/4.json
+++ b/benchmark/results/v3/v3.2.0/session_metadata/4.json
--- a/benchmark/results/v3/v3.2.0/session_metadata/5.json
+++ b/benchmark/results/v3/v3.2.0/session_metadata/5.json
--- a/benchmark/results/v3/v3.2.0/v3.2.0_benchmark_metadata.json
+++ b/benchmark/results/v3/v3.2.0/v3.2.0_benchmark_metadata.json
--- a/benchmark/results/v3/v3.3.0/PrimAITE
+++ b/benchmark/results/v3/v3.3.0/PrimAITE
--- a/benchmark/results/v3/v3.3.0/PrimAITE
+++ b/benchmark/results/v3/v3.3.0/PrimAITE
--- a/benchmark/results/v3/v3.3.0/PrimAITE
+++ b/benchmark/results/v3/v3.3.0/PrimAITE
@@ -0,0 +1,38 @@
+# PrimAITE v3.3.0 Learning Benchmark
+## PrimAITE Dev Team
+### 2024-09-02
+
+---
+## 1 Introduction
+PrimAITE v3.3.0 was benchmarked automatically upon release. Learning rate metrics were captured to be referenced during system-level testing and user acceptance testing (UAT).
+The benchmarking process consists of running 5 training session using the same config file. Each session trains an agent for 1000 episodes, with each episode consisting of 128 steps.
+The total reward per episode from each session is captured. This is then used to calculate an caverage total reward per episode from the 5 individual sessions for smoothing. Finally, a 25-widow rolling average of the average total reward per session is calculated for further smoothing.
+## 2 System Information
+### 2.1 Python
+**Version:** 3.10.14 (main, Apr  6 2024, 18:45:05) [GCC 9.4.0]
+### 2.2 System
+- **OS:** Linux
+- **OS Version:** #76~20.04.1-Ubuntu SMP Thu Jun 13 18:00:23 UTC 2024
+- **Machine:** x86_64
+- **Processor:** x86_64
+### 2.3 CPU
+- **Physical Cores:** 2
+- **Total Cores:** 4
+- **Max Frequency:** 0.00Mhz
+### 2.4 Memory
+- **Total:** 15.62GB
+- **Swap Total:** 0.00B
+## 3 Stats
+- **Total Sessions:** 5
+- **Total Episodes:** 5005
+- **Total Steps:** 640000
+- **Av Session Duration (s):** 1458.2831
+- **Av Step Duration (s):** 0.0456
+- **Av Duration per 100 Steps per 10 Nodes (s):** 4.5571
+## 4 Graphs
+### 4.1 v3.3.0 Learning Benchmark Plot
+![PrimAITE 3.3.0 Learning Benchmark Plot](PrimAITE v3.3.0 Learning Benchmark.png)
+### 4.2 Learning Benchmark of Minor and Bugfix Releases for Major Version 3
+![Learning Benchmark of Minor and Bugfix Releases for Major Version 3](PrimAITE Learning Benchmark of Minor and Bugfix Releases for Major Version 3.png)
+### 4.3 Performance of Minor and Bugfix Releases for Major Version 3
+![Performance of Minor and Bugfix Releases for Major Version 3](PrimAITE Performance of Minor and Bugfix Releases for Major Version 3.png)
--- a/benchmark/results/v3/v3.3.0/PrimAITE
+++ b/benchmark/results/v3/v3.3.0/PrimAITE
--- a/benchmark/results/v3/v3.3.0/PrimAITE
+++ b/benchmark/results/v3/v3.3.0/PrimAITE
--- a/benchmark/results/v3/v3.3.0/session_metadata/1.json
+++ b/benchmark/results/v3/v3.3.0/session_metadata/1.json
--- a/benchmark/results/v3/v3.3.0/session_metadata/2.json
+++ b/benchmark/results/v3/v3.3.0/session_metadata/2.json
--- a/benchmark/results/v3/v3.3.0/session_metadata/3.json
+++ b/benchmark/results/v3/v3.3.0/session_metadata/3.json
--- a/benchmark/results/v3/v3.3.0/session_metadata/4.json
+++ b/benchmark/results/v3/v3.3.0/session_metadata/4.json
--- a/benchmark/results/v3/v3.3.0/session_metadata/5.json
+++ b/benchmark/results/v3/v3.3.0/session_metadata/5.json
--- a/benchmark/results/v3/v3.3.0/v3.3.0_benchmark_metadata.json
+++ b/benchmark/results/v3/v3.3.0/v3.3.0_benchmark_metadata.json
--- a/benchmark/static/styles.css
+++ b/benchmark/static/styles.css
@@ -0,0 +1,34 @@
+body {
+    font-family: 'Arial', sans-serif;
+    line-height: 1.6;
+    /* margin: 1cm; */
+}
+h1, h2, h3, h4, h5, h6 {
+    font-weight: bold;
+    /* margin: 1em 0; */
+}
+p {
+    /* margin: 0.5em 0; */
+}
+ul, ol {
+    margin: 1em 0;
+    padding-left: 1.5em;
+}
+pre {
+    background: #f4f4f4;
+    padding: 0.5em;
+    overflow-x: auto;
+}
+img {
+    max-width: 100%;
+    height: auto;
+}
+table {
+    width: 100%;
+    border-collapse: collapse;
+    margin: 1em 0;
+}
+th, td {
+    padding: 0.5em;
+    border: 1px solid #ddd;
+}
--- a/benchmark/utils.py
+++ b/benchmark/utils.py
@@ -0,0 +1,47 @@
+# © Crown-owned copyright 2025, Defence Science and Technology Laboratory UK
+import platform
+from typing import Dict
+
+import psutil
+from GPUtil import GPUtil
+
+
+def get_size(size_bytes: int) -> str:
+    """
+    Scale bytes to its proper format.
+
+    e.g:
+        1253656 => '1.20MB'
+        1253656678 => '1.17GB'
+
+    :
+    """
+    factor = 1024
+    for unit in ["", "K", "M", "G", "T", "P"]:
+        if size_bytes < factor:
+            return f"{size_bytes:.2f}{unit}B"
+        size_bytes /= factor
+
+
+def _get_system_info() -> Dict:
+    """Builds and returns a dict containing system info."""
+    uname = platform.uname()
+    cpu_freq = psutil.cpu_freq()
+    virtual_mem = psutil.virtual_memory()
+    swap_mem = psutil.swap_memory()
+    gpus = GPUtil.getGPUs()
+    return {
+        "System": {
+            "OS": uname.system,
+            "OS Version": uname.version,
+            "Machine": uname.machine,
+            "Processor": uname.processor,
+        },
+        "CPU": {
+            "Physical Cores": psutil.cpu_count(logical=False),
+            "Total Cores": psutil.cpu_count(logical=True),
+            "Max Frequency": f"{cpu_freq.max:.2f}Mhz",
+        },
+        "Memory": {"Total": get_size(virtual_mem.total), "Swap Total": get_size(swap_mem.total)},
+        "GPU": [{"Name": gpu.name, "Total Memory": f"{gpu.memoryTotal}MB"} for gpu in gpus],
+    }
--- a/copyright_clause_pre_commit_hook.py
+++ b/copyright_clause_pre_commit_hook.py
@@ -0,0 +1,154 @@
+# -*- coding: utf-8 -*-
+import datetime
+import sys
+from pathlib import Path
+
+# Constants
+CURRENT_YEAR = datetime.date.today().year
+COPYRIGHT_PY_STR = f"# © Crown-owned copyright {CURRENT_YEAR}, Defence Science and Technology Laboratory UK"
+COPYRIGHT_RST_LINES = [
+    ".. only:: comment",
+    "",
+    f"    © Crown-owned copyright {CURRENT_YEAR}, Defence Science and Technology Laboratory UK",
+]
+PATHS = {Path("./src"), Path("./tests"), Path("./docs"), Path("./benchmark")}
+EXTENSIONS = {".py", ".rst"}
+
+
+def _is_copyright_line(line: str) -> bool:
+    """
+    Check if a line is a copyright line.
+
+    :param line: The line to check.
+    :return: True if the line is a copyright line, False otherwise.
+    """
+    return line.startswith("#") and "copyright" in line.lower()
+
+
+def _is_rst_copyright_lines(lines: list) -> bool:
+    """
+    Check if the lines match the RST copyright format.
+
+    :param lines: The lines to check.
+    :return: True if the lines match the RST copyright format, False otherwise.
+    """
+    return len(lines) >= 3 and lines[0] == ".. only:: comment" and "copyright" in lines[2].lower()
+
+
+def process_py_file(file_path: Path) -> bool:
+    """
+    Process a Python file to check and add/update the copyright clause.
+
+    :param file_path: The path to the file to check and update.
+    :return: True if the file was modified, False otherwise.
+    """
+    modified = False
+    try:
+        content = file_path.read_text(encoding="utf-8")
+        lines = content.splitlines(keepends=True)  # Keep line endings
+
+        if lines and _is_copyright_line(lines[0]):
+            if lines[0].strip() != COPYRIGHT_PY_STR:
+                lines[0] = COPYRIGHT_PY_STR + "\n"
+                modified = True
+                print(f"Updated copyright clause in {file_path}")
+        else:
+            lines.insert(0, COPYRIGHT_PY_STR + "\n")
+            modified = True
+            print(f"Added copyright clause to {file_path}")
+
+        if modified:
+            file_path.write_text("".join(lines), encoding="utf-8")
+    except Exception as e:
+        print(f"Failed to process {file_path}: {e}")
+        return False
+
+    return modified
+
+
+def process_rst_file(file_path: Path) -> bool:
+    """
+    Process an RST file to check and add/update the copyright clause.
+
+    :param file_path: The path to the file to check and update.
+    :return: True if the file was modified, False otherwise.
+    """
+    modified = False
+    try:
+        content = file_path.read_text(encoding="utf-8")
+        lines = content.splitlines(keepends=True)  # Keep line endings
+
+        existing_block = any(".. only:: comment" in line for line in lines)
+
+        if existing_block:
+            # Check if the block is correct
+            for i, line in enumerate(lines):
+                if line.strip() == ".. only:: comment":
+                    if lines[i : i + 3] != [
+                        COPYRIGHT_RST_LINES[0] + "\n",
+                        COPYRIGHT_RST_LINES[1] + "\n",
+                        COPYRIGHT_RST_LINES[2] + "\n",
+                    ]:
+                        # Update the incorrect block
+                        lines[i : i + 3] = [
+                            COPYRIGHT_RST_LINES[0] + "\n",
+                            COPYRIGHT_RST_LINES[1] + "\n",
+                            COPYRIGHT_RST_LINES[2] + "\n",
+                        ]
+                        modified = True
+                        print(f"Updated copyright clause in {file_path}")
+                    break
+        else:
+            # Insert new copyright block
+            lines = [line + "\n" for line in COPYRIGHT_RST_LINES] + ["\n"] + lines
+            modified = True
+            print(f"Added copyright clause to {file_path}")
+
+        if modified:
+            file_path.write_text("".join(lines), encoding="utf-8")
+    except Exception as e:
+        print(f"Failed to process {file_path}: {e}")
+        return False
+
+    return modified
+
+
+def process_file(file_path: Path) -> bool:
+    """
+    Check if a file has the correct copyright clause and add or update it if necessary.
+
+    :param file_path: The path to the file to check and update.
+    :return: True if the file was modified, False otherwise.
+    """
+    if file_path.suffix == ".py":
+        return process_py_file(file_path)
+    elif file_path.suffix == ".rst":
+        return process_rst_file(file_path)
+    return False
+
+
+def main() -> int:
+    """
+    Main function to walk through the root directories, check files, and update the copyright clause.
+
+    :return: 1 if any file was modified, 0 otherwise.
+    """
+    files_checked = 0
+    files_modified = 0
+    any_file_modified = False
+    for path in PATHS:
+        for file_path in path.rglob("*"):
+            if file_path.suffix in EXTENSIONS:
+                files_checked += 1
+                if process_file(file_path):
+                    files_modified += 1
+                    any_file_modified = True
+
+    if any_file_modified:
+        print(f"Files Checked: {files_checked}. Files Modified: {files_modified}")
+        return 1
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())
--- a/diagram/classes.puml
+++ b/diagram/classes.puml
@@ -48,7 +48,7 @@ class "ActiveNode" as primaite.nodes.active_node.ActiveNode {
  file_system_state_actual : GOOD
  file_system_state_observed : REPAIRING, RESTORING, GOOD
  ip_address : str
-  patching_count : int
+  fixing_count : int
  software_state
  software_state : GOOD
  set_file_system_state(file_system_state: FileSystemState) -> None
@@ -353,10 +353,10 @@ class "SB3Agent" as primaite.agents.sb3.SB3Agent {
 }
 class "Service" as primaite.common.service.Service {
  name : str
-  patching_count : int
+  fixing_count : int
  port : str
  software_state : GOOD
-  reduce_patching_count() -> None
+  reduce_fixing_count() -> None
 }
 class "ServiceNode" as primaite.nodes.service_node.ServiceNode {
  services : Dict[str, Service]
@@ -455,7 +455,7 @@ class "TrainingConfig" as primaite.config.training_config.TrainingConfig {
  sb3_output_verbose_level
  scanning : float
  seed : Optional[int]
-  service_patching_duration : int
+  service_fixing_duration : int
  session_type
  time_delay : int
  from_dict(config_dict: Dict[str, Any]) -> TrainingConfig
--- a/docs/Makefile
+++ b/docs/Makefile
@@ -1,3 +1,4 @@
+# © Crown-owned copyright 2025, Defence Science and Technology Laboratory UK
 # Minimal makefile for Sphinx documentation
 # You can set these variables from the command line, and also
 # from the environment for the first two.
@@ -6,17 +7,19 @@ SPHINXBUILD   ?= sphinx-build
 SOURCEDIR     = .
 BUILDDIR      = _build

-AUTOSUMMARY="source\_autosummary"
+AUTOSUMMARY="source/_autosummary"
+NOTEBOOKS="source/notebooks/notebooks"

 # Remove command is different depending on OS
 ifdef OS
-	RM = IF exist $(AUTOSUMMARY) (  RMDIR $(AUTOSUMMARY) /s /q )
+	RM = IF exist $(AUTOSUMMARY) (RMDIR $(AUTOSUMMARY) /s /q) & IF exist $(NOTEBOOKS) (RMDIR $(NOTEBOOKS) /s /q)
 else
   ifeq ($(shell uname), Linux)
-      RM = rm -rf $(AUTOSUMMARY)
+      RM = rm -rf $(AUTOSUMMARY) $(NOTEBOOKS)
   endif
 endif

+
 # Put it first so that "make" without argument is like "make help".
 help:
 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
@@ -29,6 +32,5 @@ clean:
 # Catch-all target: route all unknown targets to Sphinx using the new
 # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
 %: Makefile | clean
-	pip-licenses --format=rst --with-urls --output-file=source/primaite-dependencies.rst

 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
--- a/docs/_static/c2_sequence.png
+++ b/docs/_static/c2_sequence.png
--- a/docs/_static/component_relationship.png
+++ b/docs/_static/component_relationship.png
--- a/docs/_static/firewall_acl.png
+++ b/docs/_static/firewall_acl.png
--- a/docs/_static/four_node_two_switch_network.png
+++ b/docs/_static/four_node_two_switch_network.png
--- a/docs/_static/node_nic_link_component_diagram.png
+++ b/docs/_static/node_nic_link_component_diagram.png
--- a/docs/_static/notebooks/extensions.png
+++ b/docs/_static/notebooks/extensions.png
--- a/docs/_static/notebooks/install_extensions.png
+++ b/docs/_static/notebooks/install_extensions.png
--- a/docs/_static/primAITE_architecture.png
+++ b/docs/_static/primAITE_architecture.png
--- a/docs/_static/switched_p2p_network.png
+++ b/docs/_static/switched_p2p_network.png
--- a/docs/_templates/custom-class-template.rst
+++ b/docs/_templates/custom-class-template.rst
@@ -1,6 +1,6 @@
 .. only:: comment

-    © Crown-owned copyright 2023, Defence Science and Technology Laboratory UK
+    © Crown-owned copyright 2025, Defence Science and Technology Laboratory UK

 ..
    Credit to https://github.com/JamesALeedham/Sphinx-Autosummary-Recursion for the custom templates.
@@ -12,7 +12,8 @@
 .. autoclass:: {{ objname }}
   :members:
   :show-inheritance:
-   :inherited-members:
+   :inherited-members: BaseModel
+   :exclude-members: model_computed_fields, model_config, model_fields
   :special-members: __init__, __call__, __add__, __mul__

   {% block methods %}
@@ -22,7 +23,14 @@
   .. autosummary::
      :nosignatures:
   {% for item in methods %}
-      {%- if not item.startswith('_') %}
+      {%- if not item.startswith('_') and item not in [
+         'construct', 'copy', 'dict', 'from_orm', 'json', 'model_construct',
+         'model_copy', 'model_dump', 'model_dump_json', 'model_json_schema',
+         'model_parametrized_name', 'model_post_init', 'model_rebuild', '',
+         'model_validate', 'model_validate_json', 'model_validate_strings',
+         'parse_file', 'parse_obj', 'parse_raw', 'schema', 'schema_json',
+         'update_forward_refs', 'validate',
+      ] %}
      ~{{ name }}.{{ item }}
      {%- endif -%}
   {%- endfor %}
@@ -35,7 +43,12 @@

   .. autosummary::
   {% for item in attributes %}
+      {%- if not item.startswith('_') and item not in [
+         'model_computed_fields', 'model_config', 'model_extra', 'model_fields',
+         'model_fields_set',
+      ] %}
      ~{{ name }}.{{ item }}
+      {%- endif -%}
   {%- endfor %}
   {% endif %}
   {% endblock %}
--- a/docs/_templates/custom-module-template.rst
+++ b/docs/_templates/custom-module-template.rst
@@ -1,6 +1,6 @@
 .. only:: comment

-    © Crown-owned copyright 2023, Defence Science and Technology Laboratory UK
+    © Crown-owned copyright 2025, Defence Science and Technology Laboratory UK

 ..
    Credit to https://github.com/JamesALeedham/Sphinx-Autosummary-Recursion for the custom templates.
--- a/docs/api.rst
+++ b/docs/api.rst
@@ -1,6 +1,8 @@
+:orphan:
+
 .. only:: comment

-    © Crown-owned copyright 2023, Defence Science and Technology Laboratory UK
+    © Crown-owned copyright 2025, Defence Science and Technology Laboratory UK

 ..
   DO NOT DELETE THIS FILE! It contains the all-important `.. autosummary::` directive with `:recursive:` option, without
@@ -17,4 +19,3 @@
   :recursive:

   primaite
-   tests
--- a/docs/build-sphinx-docs-to-github-pages.sh
+++ b/docs/build-sphinx-docs-to-github-pages.sh
@@ -0,0 +1,68 @@
+# © Crown-owned copyright 2025, Defence Science and Technology Laboratory UK
+#!/bin/bash
+set -x
+
+apt-get update
+apt-get -y install git rsync python3-sphinx
+
+pwd ls -lah
+export SOURCE_DATE_EPOCH=$(git log -1 --pretty=%ct)
+
+##############
+# BUILD DOCS #
+##############
+
+cd docs
+# Python Sphinx, configured with source/conf.py
+# See https://www.sphinx-doc.org/
+make clean
+make html
+
+cd ..
+#######################
+# Update GitHub Pages #
+#######################
+
+git config --global user.name "${GITHUB_ACTOR}"
+git config --global user.email "${GITHUB_ACTOR}@users.noreply.github.com"
+
+docroot=`mktemp -d`
+
+rsync -av $PWD/docs/_build/html/ "${docroot}/"
+
+pushd "${docroot}"
+
+git init
+git remote add deploy "https://token:${GITHUB_TOKEN}@github.com/${GITHUB_REPOSITORY}.git"
+git checkout -b sphinx-docs-github-pages
+
+# Adds .nojekyll file to the root to signal to GitHub that
+# directories that start with an underscore (_) can remain
+touch .nojekyll
+
+# Add README
+cat > README.md <<EOF
+# README for the Sphinx Docs GitHub Pages Branch
+This branch is simply a cache for the website served from https://Autonomous-Resilient-Cyber-Defence.github.io/PrimAITE/,
+and is  not intended to be viewed on github.com.
+For more information on how this site is built using Sphinx, Read the Docs, GitHub Actions/Pages, and demo
+implementation from https://github.com/annegentle, see:
+ * https://www.docslikecode.com/articles/github-pages-python-sphinx/
+ * https://tech.michaelaltfield.net/2020/07/18/sphinx-rtd-github-pages-1
+ * https://github.com/annegentle/create-demo
+EOF
+
+# Copy the resulting html pages built from Sphinx to the sphinx-docs-github-pages branch
+git add .
+
+# Make a commit with changes and any new files
+msg="Updating Docs for commit ${GITHUB_SHA} made on `date -d"@${SOURCE_DATE_EPOCH}" --iso-8601=seconds` from ${GITHUB_REF} by ${GITHUB_ACTOR}"
+git commit -am "${msg}"
+
+# overwrite the contents of the sphinx-docs-github-pages branch on our github.com repo
+git push deploy sphinx-docs-github-pages --force
+
+popd # return to main repo sandbox root
+
+# exit cleanly
+exit 0
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -1,4 +1,4 @@
-# © Crown-owned copyright 2023, Defence Science and Technology Laboratory UK
+# © Crown-owned copyright 2025, Defence Science and Technology Laboratory UK
 # Configuration file for the Sphinx documentation builder.
 #
 # For the full list of built-in configuration values, see the documentation:
@@ -9,13 +9,15 @@ import datetime
 # -- Project information -----------------------------------------------------
 # https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information
 import os
+import shutil
 import sys
+from pathlib import Path
+from typing import Any, List, Optional

 import furo  # noqa

 sys.path.insert(0, os.path.abspath("../"))

-
 # -- Project information -----------------------------------------------------
 year = datetime.datetime.now().year
 project = "PrimAITE"
@@ -28,6 +30,11 @@ with open("../src/primaite/VERSION", "r") as file:
 # The full version, including alpha/beta/rc tags
 release = version

+# set global variables
+rst_prolog = f"""
+.. |VERSION| replace::  {release}
+"""
+
 html_title = f"{project} v{release} docs"

 # -- General configuration ---------------------------------------------------
@@ -43,16 +50,136 @@ extensions = [
    "sphinx.ext.viewcode",  # Add a link to the Python source code for classes, functions etc.
    "sphinx.ext.todo",
    "sphinx_copybutton",  # Adds a copy button to code blocks
-    "sphinx_code_tabs",  # Enables tabbed code blocks
+    "nbsphinx",
 ]

-
 templates_path = ["_templates"]
-exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"]
-
+exclude_patterns = [
+    "_build",
+    "Thumbs.db",
+    ".DS_Store",
+]

 # -- Options for HTML output -------------------------------------------------
 # https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output

 html_theme = "furo"
 html_static_path = ["_static"]
+html_theme_options = {"globaltoc_collapse": True, "globaltoc_maxdepth": 2}
+html_copy_source = False
+nbsphinx_allow_errors = False  # set to True to take shortcuts
+html_scaled_image_link = False
+
+# make some stuff easier to read
+nbsphinx_prolog = """
+.. raw:: html
+
+    <style>
+        .stderr {
+            color: #000 !important
+        }
+    </style>
+"""
+
+
+def replace_token(app: Any, docname: Any, source: Any):
+    """Replaces a token from the list of tokens."""
+    result = source[0]
+    for key in app.config.tokens:
+        result = result.replace(key, app.config.tokens[key])
+    source[0] = result
+
+
+tokens = {
+    "{VERSION}": release,
+}  # Token VERSION is replaced by the value of the PrimAITE version in the version file
+"""Dict containing the tokens that need to be replaced in documentation."""
+
+
+def notebook_assets(ignored_files: Optional[List[str]] = [], include_file_types: Optional[List[str]] = []) -> Any:
+    """
+    Creates a function to be used with `shutil.copytree`'s `ignore` parameter.
+
+    :param ignored_files: A list of specific file names to ignore. If a file in the directory matches one of these
+    names, it will be excluded from the copy process.
+    :type ignored_files: Optional[List[str]]
+    :param include_file_types: A list of file extensions to include in the copy process. Files that do not match these
+    extensions will be excluded. If this list is empty, all files will be excluded, effectively copying only
+    directories.
+    :type include_file_types: Optional[List[str]]
+    """
+
+    def ignore_items(directory: List[str], contents: List[str]) -> List[str]:
+        """
+        Determines which files and directories should be ignored during the copy process.
+
+        :param directory: The directory being copied.
+        :type directory: str
+        :param contents: A list of contents in the directory.
+        :type contents: List[str]
+        :return: A list of items to exclude from the copy process.
+        :rtype: List[str]
+        """
+        exclude_items = []
+
+        for item in contents:
+            if item in ignored_files:
+                exclude_items.append(item)
+                continue
+
+            if len(include_file_types) > 0:
+                if not any(item.lower().endswith(ext.lower()) for ext in include_file_types) and os.path.isdir(item):
+                    exclude_items.append(item)
+            else:
+                # if we dont specify which files to include, exclude everything
+                exclude_items.append(item)
+
+        # exclude files but not directories
+        return [path for path in exclude_items if not (Path(directory) / path).is_dir()]
+
+    return ignore_items
+
+
+def copy_notebooks_to_docs() -> Any:
+    """
+    Incredibly over-engineered method that copies the notebooks and its assets to a directory within the docs directory.
+
+    This allows developers to create new notebooks without having to worry about updating documentation when
+    a new notebook is included within PrimAITE.
+    """
+    notebook_asset_types = [".ipynb", ".png", ".svg"]
+    notebook_directories = []
+
+    # find paths where notebooks are contained
+    for notebook in Path("../src/primaite").rglob("*.ipynb"):
+        # add parent path to notebook directory if not already added
+        if notebook.parent not in notebook_directories:
+            notebook_directories.append(notebook.parent)
+
+    # go through the notebook directories and copy the notebooks and extra assets
+    for notebook_parent in notebook_directories:
+        shutil.copytree(
+            src=notebook_parent,
+            dst=Path("source") / "notebooks" / notebook_parent.name,
+            ignore=notebook_assets(include_file_types=notebook_asset_types),
+            dirs_exist_ok=True,
+        )
+
+
+def suppress_log_output():
+    """Sets the log level while building the documentation."""
+    from primaite import _FILE_HANDLER, _LOGGER, _STREAM_HANDLER
+
+    log_level = "WARN"
+
+    _LOGGER.setLevel(log_level)
+    _STREAM_HANDLER.setLevel(log_level)
+    _FILE_HANDLER.setLevel(log_level)
+
+
+def setup(app: Any):
+    """Custom setup for sphinx."""
+    suppress_log_output()
+    copy_notebooks_to_docs()
+    app.add_config_value("tokens", {}, True)
+    app.connect("source-read", replace_token)
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -1,65 +1,101 @@
 .. only:: comment

-    © Crown-owned copyright 2023, Defence Science and Technology Laboratory UK
+    © Crown-owned copyright 2025, Defence Science and Technology Laboratory UK

 Welcome to PrimAITE's documentation
 ====================================

 What is PrimAITE?
------------------------
-
-PrimAITE (Primary-level AI Training Environment) is a simulation environment for training AI under the ARCD programme. It incorporates the functionality required of a Primary-level environment, as specified in the Dstl ARCD Training Environment Matrix document:
-
-* The ability to model a relevant platform / system context;
-* The ability to model key characteristics of a platform / system by representing connections, IP addresses, ports, traffic loading, operating systems, file system, services and processes;
-* Operates at machine-speed to enable fast training cycles.
-
-PrimAITE aims to evolve into an ARCD environment that could be used as the follow-on from Reception level approaches (e.g. YAWNING TITAN), and help bridge the Sim-to-Real gap into Secondary level environments (e.g. IMAGINARY YAK).
-
-This is similar to the approach taken by FVEY international partners (e.g. AUS CyBORG, US NSA FARLAND and CAN CyGil). These environments are referenced by the Dstl ARCD Agent Training Environments Knowledge Transfer document (TR141342).
-
-What is PrimAITE built with
--------------------------------------
-
-* `OpenAI's Gym <https://gym.openai.com/>`_ is used as the basis for AI blue agent interaction with the PrimAITE environment
-* `Networkx <https://github.com/networkx/networkx>`_ is used as the underlying data structure used for the PrimAITE environment
-* `Stable Baselines 3 <https://github.com/DLR-RM/stable-baselines3>`_ is used as a default source of RL algorithms (although PrimAITE is not limited to SB3 agents)
-* `Ray RLlib <https://github.com/ray-project/ray>`_ is used as an additional source of RL algorithms
-* `Typer <https://github.com/tiangolo/typer>`_ is used for building CLIs (Command Line Interface applications)
-* `Jupyterlab <https://github.com/jupyterlab/jupyterlab>`_ is used as an extensible environment for interactive and reproducible computing, based on the Jupyter Notebook Architecture
-* `Platformdirs <https://github.com/platformdirs/platformdirs>`_ is used for finding the right location to store user data and configuration but varies per platform
-* `Plotly <https://github.com/plotly/plotly.py>`_ is used for building high level charts
-
-
-Where next?
------------
-
-Head over to the :ref:`getting-started` page to install and setup PrimAITE!
+-----------------

 .. toctree::
   :maxdepth: 8
-   :caption: Contents:
+   :caption: About PrimAITE:
+   :hidden:
+
+   source/about
+   source/dependencies
+   source/glossary
+
+.. toctree::
+   :maxdepth: 8
+   :caption: How To
+   :hidden:
+
+   source/how_to
+   source/how_to_guides/custom_actions
+   source/how_to_guides/custom_environments
+   source/how_to_guides/custom_rewards
+   source/how_to_guides/custom_software
+   source/how_to_guides/using_dev_cli
+   source/how_to_guides/extensible_actions
+   source/how_to_guides/extensible_agents
+   source/how_to_guides/extensible_nodes
+   source/how_to_guides/extensible_rewards
+   source/how_to_guides/primaite_yaml_migration_guide
+
+.. toctree::
+   :caption: Usage:
   :hidden:

   source/getting_started
-   source/about
+   source/game_layer
+   source/simulation
   source/config
-   source/primaite_session
-   source/custom_agent
-   PrimAITE API <source/_autosummary/primaite>
-   PrimAITE Tests <source/_autosummary/tests>
-   source/dependencies
-   source/glossary
-   source/migration_1.2_-_2.0
-
-
-.. TODO: Add project links once public repo has been created
+   source/rewards
+   source/varying_config_files
+   source/environment
+   source/action_masking
+   source/node_sets

 .. toctree::
-   :caption: Project Links:
+   :caption: Notebooks:
   :hidden:

-   Code <https://github.com/Autonomous-Resilient-Cyber-Defence/PrimAITE>
-   Issues <https://github.com/Autonomous-Resilient-Cyber-Defence/PrimAITE/issues>
-   Pull Requests <https://github.com/Autonomous-Resilient-Cyber-Defence/PrimAITE/pulls>
-   Discussions <https://github.com/Autonomous-Resilient-Cyber-Defence/PrimAITE/discussions>
+   source/example_notebooks
+   source/notebooks/executed_notebooks
+
+.. toctree::
+   :caption: Developer information:
+   :hidden:
+
+   source/developer_tools
+   source/state_system
+   source/request_system
+   PrimAITE API <source/_autosummary/primaite>
+   PrimAITE Tests <source/_autosummary/tests>
+
+
+Overview
+^^^^^^^^
+
+The ARCD Primary-level AI Training Environment (**PrimAITE**) provides an effective simulation capability for training and evaluating AI in a cyber-defensive role. It incorporates the functionality required of a primary-level  ARCD environment:
+
+- The ability to model a relevant system context;
+- Modelling an adversarial agent that the defensive agent can be trained and evaluated against;
+- The ability to model key characteristics of a system by representing hosts, servers, network devices, IP addresses, ports, operating systems, folders / files, applications, services and links;
+- Modelling background (green) pattern-of-life;
+- Operates at machine-speed to enable fast training cycles via Reinforcement Learning (RL).
+
+PrimAITE has been designed as an extensible environment and toolkit to support the development, test, training and evaluation of AI-based cyber defensive agents. Whilst PrimAITE ships with a number of example modelled scenarios (a.k.a. Use Cases), it has not been developed to mandate the solving of a single cyber challenge, and instead provides a highly flexible environment application that can be extended and reconfigured by the user to suit their specific cyber defence training and evaluation needs. PrimAITE provides default networks, red agent and green agent behaviour, reward functions, and action / observation space configuration, all of which can be utilised out of the box, but which ultimately can (and in some instances should) be built upon and / or reconfigured to meet the needs of different defensive agent developers. The PrimAITE user guide provides comprehensive instruction on all PrimAITE features, functionality and components, and can be consulted in order to help guide users in any reconfiguration or enhancements they wish to undertake; a library of example Jupyter notebooks are also provided to support such work.
+
+Features
+^^^^^^^^
+
+PrimAITE incorporates the following features:
+
+- Architected with a separate Simulation layer and Game layer. This separation of concerns defines a clear path towards transfer learning with environments of differing fidelity;
+- Ability to reconfigure an RL reward function based on (a) the ability to counter the modelled adversarial cyber-attack, and (b) the ability to ensure success for green agents;
+- Access Control List (ACL) functions for network devices (routers and firewalls), following standard ACL rule format (e.g., DENY / PERMIT, source / destination IP addresses, protocol and port);
+- Application of traffic to the links of the system laydown adheres to the ACL rulesets and routing tables contained within each network device;
+- Provides RL environments adherent to the Farama Foundation Gymnasium (Previously OpenAI Gym) API, allowing integration with any compliant RL Agent frameworks;
+- Provides RL environments adherent to Ray RLlib environment specifications for single-agent and multi-agent scenarios;
+- Assessed for compatibility with Stable-Baselines3 (SB3), Ray RLlib, and bespoke agents;
+- Persona-based adversarial (Red) agent behaviour; several out-the-box personas are provided, and more can be developed to suit the needs of the task. Stochastic variations in Red agent behaviour are also included as required;
+- A robust system logging tool, automatically enabled at the node level and featuring various log levels and terminal output options, enables PrimAITE users to conduct in-depth network simulations;
+- A PCAP service is seamlessly integrated within the simulation, automatically capturing and logging frames for both
+  inbound and outbound traffic at the network interface level. This automatic functionality, combined with the ability
+  to separate traffic directions, significantly enhances network analysis and troubleshooting capabilities;
+- Agent action logs provide a description of every action taken by each agent during the episode. This includes timestep, action, parameters, request and response, for all Blue agent activity, which is aligned with the Track 2 Common Action / Observation Space (CAOS) format. Action logs also detail all scripted / stochastic red / green agent actions;
+- Environment ground truth is provided at every timestep, providing a full description of the environment’s true state;
+- Alignment with CAOS provides the ability to transfer agents between CAOS compliant environments.
--- a/docs/make.bat
+++ b/docs/make.bat
@@ -1,4 +1,5 @@
@ECHO OFF
+REM  © Crown-owned copyright 2024, Defence Science and Technology Laboratory UK

 setlocal EnableDelayedExpansion

@@ -36,11 +37,6 @@ IF EXIST %AUTOSUMMARYDIR% (
    RMDIR %AUTOSUMMARYDIR% /s /q
 )

-REM print the YT licenses
-set LICENSEBUILD=pip-licenses --format=rst --with-urls
-set DEPS="%cd%\source\primaite-dependencies.rst"
-
-%LICENSEBUILD% --output-file=%DEPS%

 %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
 goto end
--- a/docs/source/about.rst
+++ b/docs/source/about.rst
@@ -1,414 +1,72 @@
 .. only:: comment

-    © Crown-owned copyright 2023, Defence Science and Technology Laboratory UK
+    © Crown-owned copyright 2024, Defence Science and Technology Laboratory UK

 .. _about:

 About PrimAITE
 ==============

-Features
-********
+Architecture
+^^^^^^^^^^^^

-PrimAITE provides the following features:
+PrimAITE is a Python application and will operate on multiple Operating Systems (Windows, Linux and Mac);
+a comprehensive installation and user guide is provided with each release to support its usage.

-* A flexible network / system laydown based on the Python networkx framework
-* Nodes and links (edges) host Python classes in order to present attributes and methods (and hence, a more representative model of a platform / system)
-* A 'green agent' Information Exchange Requirement (IER) function allows the representation of traffic (protocols and loading) on any / all links. Application of IERs is based on the status of node operating systems and services
-* A 'green agent' node Pattern-of-Life (PoL) function allows the representation of core behaviours on nodes (e.g. changing the Hardware state, Software State, Service state, or File System state)
-* An Access Control List (ACL) function, mimicking the behaviour of a network firewall, is applied across the model, following standard ACL rule format (e.g. DENY/ALLOW, source IP, destination IP, protocol and port). Application of IERs adheres to any ACL restrictions
-* Presents an OpenAI Gym interface to the environment, allowing integration with any OpenAI Gym compliant defensive agents
-* Red agent activity based on 'red' IERs and 'red' PoL
-* Defined reward function for use with RL agents (based on nodes status, and green / red IER success)
-* Fully configurable (network / system laydown, IERs, node PoL, ACL, episode step period, episode max steps) and repeatable to suit the training requirements of agents. Therefore, not bound to a representation of any particular platform, system or technology
-* Full capture of discrete metrics relating to agent training (full system state, agent actions taken, average reward)
-* Networkx provides laydown visualisation capability
+Configuration of PrimAITE is achieved via included YAML files which support full control over the network / system laydown being modelled, background pattern of life, adversarial (red agent) behaviour, and step and episode count.
+A Simulation Controller layer manages the overall running of the simulation, keeping track of all low-level objects.

-Architecture - Nodes and Links
-******************************
+It is agnostic to the number of agents, their action / observation spaces, and the RL library being used.

-**Nodes**
+It presents a public API providing a method for describing the current state of the simulation, a method that accepts action requests and provides responses, and a method that triggers a timestep advancement.
+The Game Layer converts the simulation into a playable game for the agent(s).

-An inheritance model has been adopted in order to model nodes. All nodes have the following base attributes (Class: Node):
+It translates between simulation state and Gymnasium.Spaces to pass action / observation data between the agent(s) and the simulation. It is responsible for calculating rewards, managing Multi-Agent RL (MARL) action turns, and via a single agent interface can interact with Blue, Red and Green agents.

-* ID
-* Name
-* Type (e.g. computer, switch, RTU - enumeration)
-* Priority (P1, P2, P3, P4 or P5 - enumeration)
-* Hardware State (ON, OFF, RESETTING, SHUTTING_DOWN, BOOTING - enumeration)
+Agents can either generate their own scripted behaviour or accept input behaviour from an RL agent.

-Active Nodes also have the following attributes (Class: Active Node):
+Finally, a Gymnasium / Ray RLlib Environment Layer forwards requests to the Game Layer as the agent sends them. This layer also manages most of the I/O, such as reading in the configuration files and saving agent logs.

-* IP Address
-* Software State (GOOD, PATCHING, COMPROMISED - enumeration)
-* File System State (GOOD, CORRUPT, DESTROYED, REPAIRING, RESTORING - enumeration)
+.. image:: ../../_static/primAITE_architecture.png
+    :width: 500
+    :align: center

-Service Nodes also have the following attributes (Class: Service Node):

-* List of Services (where service is composed of service name and port). There is no theoretical limit on the number of services that can be modelled. Services and protocols are currently intrinsically linked (i.e. a service is an application on a node transmitting traffic of this protocol type)
-* Service state (GOOD, PATCHING, COMPROMISED, OVERWHELMED - enumeration)
+Training & Evaluation Capability
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

-Passive Nodes are currently not used (but may be employed for non IP-based components such as machinery actuators in future releases).
+PrimAITE provides a training and evaluation capability to AI agents in the context of cyber-attack, via its Gymnasium / Ray RLlib compliant interface.

-**Links**
+Scenarios can be constructed to reflect network / system laydowns consisting of any configuration of nodes (e.g., PCs, servers etc.) and the networking equipment and links between them.

-Links are modelled both as network edges (networkx) and as Python classes, in order to extend their functionality. Links include the following attributes:
+All nodes can be configured to contain applications, services, folders, and files (and their status), including a powerful terminal simulation for SSH tunnelling and remote command execution.

-* ID
-* Name
-* Bandwidth (bits/s)
-* Source node ID
-* Destination node ID
-* Protocol list (containing the loading of protocols currently running on the link)
+Realistic network traffic generated by software or by users. Packets move through the network devices (switches, routers, firewalls, network interfaces) in accordance to control rules such as: internet protocols, Access control lists (ACLs), and routing tables.

-When the simulation runs, IERs are applied to the links in order to model traffic loading, individually assigned to each protocol. This allows green (background) and red agent behaviour to be modelled, and defensive agents to identify suspicious traffic patterns at a protocol / traffic loading level of fidelity.
+Highlights of PrimAITE's training and evaluation capability are:

-Information Exchange Requirements (IERs)
-****************************************
+- Fully configurable (network / system laydown, green pattern-of-life, red personas, reward function, ACL rules for each device, number of episodes / steps, action / observation space) and repeatable to suit the requirements of AI agents;
+- Domain randomisation through stochastic agent behaviour and the ability to switch between scenario variants between environment episodes.
+- Extensible through plugins to model any network behaviour.
+- Can integrate with any Gymnasium / Ray RLlib compliant AI agent.

-PrimAITE adopts the concept of Information Exchange Requirements (IERs) to model both green agent (background) and red agent (adversary) behaviour. IERs are used to initiate modelling of traffic loading on the network, and have the following attributes:

-* ID
-* Start step (i.e. which step in the training episode should the IER start)
-* End step (i.e. which step in the training episode should the IER end)
-* Source node ID
-* Destination node ID
-* Load (bits/s)
-* Protocol
-* Port
-* Running status (i.e. on / off)
+PrimAITE provides a number of use cases (network and red/green action configurations) by default which the user is able to extend and modify as required.

-The application of green agent IERs between a source and destination follows a number of rules. Specifically:
+What is PrimAITE built with
+---------------------------

-1. Does the current simulation time step fall between IER start and end step
-2. Is the source node operational (both physically and at an O/S level), and is the service (protocol / port) associated with the IER (a) present on this node, and (b) in an operational state (i.e. not PATCHING)
-3. Is the destination node operational (both physically and at an O/S level), and is the service (protocol / port) associated with the IER (a) present on this node, and (b) in an operational state (i.e. not PATCHING)
-4. Are there any Access Control List rules in place that prevent the application of this IER
-5. Are all switches in the (OSPF) path between source and destination operational (both physically and at an O/S level)
+* `Gymnasium <https://gymnasium.farama.org/>`_ is used as the basis for AI blue agent interaction with the PrimAITE environment
+* `Pydantic <https://docs.pydantic.dev/latest/>`_ is used for data validation
+* `Platformdirs <https://github.com/platformdirs/platformdirs>`_ is used for storing user data and configuration correctly between platforms
+* `Typer <https://github.com/tiangolo/typer>`_ is used for the Command Line Interface
+* `Jupyterlab <https://github.com/jupyterlab/jupyterlab>`_ is used as an extensible environment for interactive and reproducible computing, based on the Jupyter Notebook Architecture
+* `Plotly <https://github.com/plotly/plotly.py>`_ is used for building high level charts
+* `Stable Baselines 3 <https://github.com/DLR-RM/stable-baselines3>`_ is used for ensuring compatibility with RL libraries
+* `Ray RLlib <https://github.com/ray-project/ray>`_ is also used for ensuring compatibility with RL libraries

-For red agent IERs, the application of IERs between a source and destination follows a number of subtly different rules. Specifically:

-1. Does the current simulation time step fall between IER start and end step
-2. Is the source node operational, and is the service (protocol / port) associated with the IER (a) present on that node and (b) already in a compromised state
-3. Is the destination node operational, and is the service (protocol / port) associated with the IER present on that node
-4. Are there any Access Control List rules in place that prevent the application of this IER
-5. Are all switches in the (OSPF) path between source and destination operational (both physically and at an O/S level)
+Getting Started with PrimAITE
+-----------------------------

-Assuming the rules pass, the IER is applied to all relevant links (based on use of OSPF) between source and destination.
-
-Node Pattern-of-Life
-********************
-
-Every node can be impacted (i.e. have a status change applied to it) by either green agent pattern-of-life or red agent pattern-of-life. This is distinct from IERs, and allows for attacks (and defence) to be modelled purely within the confines of a node.
-
-The status changes that can be made to a node are as follows:
-
-* All Nodes:
-
-   * Hardware State:
-
-      * ON
-      * OFF
-      * RESETTING - when a status of resetting is entered, the node will automatically exit this state after a number of steps (as defined by the nodeResetDuration configuration item) after which it returns to an ON state
-      * BOOTING
-      * SHUTTING_DOWN
-
-* Active Nodes and Service Nodes:
-
-   * Software State:
-
-      * GOOD
-      * PATCHING - when a status of patching is entered, the node will automatically exit this state after a number of steps (as defined by the osPatchingDuration configuration item) after which it returns to a GOOD state
-      * COMPROMISED
-
-   * File System State:
-
-      * GOOD
-      * CORRUPT (can be resolved by repair or restore)
-      * DESTROYED (can be resolved by restore only)
-      * REPAIRING - when a status of repairing is entered, the node will automatically exit this state after a number of steps (as defined by the fileSystemRepairingLimit configuration item) after which it returns to a GOOD state
-      * RESTORING - when a status of repairing is entered, the node will automatically exit this state after a number of steps (as defined by the fileSystemRestoringLimit configuration item) after which it returns to a GOOD state
-
-* Service Nodes only:
-
-   * Service State (for any associated service):
-
-      * GOOD
-      * PATCHING - when a status of patching is entered, the service will automatically exit this state after a number of steps (as defined by the servicePatchingDuration configuration item) after which it returns to a GOOD state
-      * COMPROMISED
-      * OVERWHELMED
-
-Red agent pattern-of-life has an additional feature not found in the green pattern-of-life. This is the ability to influence the state of the attributes of a node via a number of different conditions:
-
-   * DIRECT:
-
-   The pattern-of-life described by the configuration file item will be applied regardless of any other conditions in the network. This is particularly useful for direct red agent entry into the network.
-
-   * IER:
-
-   The pattern-of-life described by the configuration file item will be applied to the service on the node, only if there is an IER of the same protocol / service type incoming at the specified timestep.
-
-   * SERVICE:
-
-   The pattern-of-life described by the configuration file item will be applied to the node based on the state of a service. The service can either be on the same node, or a different node within the network.
-
-Access Control List modelling
-*****************************
-
-An Access Control List (ACL) is modelled to provide the means to manage traffic flows in the system. This will allow defensive agents the means to turn on / off rules, or potentially create new rules, to counter an attack.
-
-The ACL follows a standard network firewall format. For example:
-
-.. list-table:: ACL example
-   :widths: 25 25 25 25 25
-   :header-rows: 1
-
-   * - Permission
-     - Source IP
-     - Dest IP
-     - Protocol
-     - Port
-   * - DENY
-     - 192.168.1.2
-     - 192.168.1.3
-     - HTTPS
-     - 443
-   * - ALLOW
-     - 192.168.1.4
-     - ANY
-     - SMTP
-     - 25
-   * - DENY
-     - ANY
-     - 192.168.1.5
-     - ANY
-     - ANY
-
-All ACL rules are considered when applying an IER. Logic follows the order of rules, so a DENY or ALLOW for the same parameters will override an earlier entry.
-
-Observation Spaces
-******************
-The observation space provides the blue agent with information about the current status of nodes and links.
-
-PrimAITE builds on top of Gym Spaces to create an observation space that is easily configurable for users. It's made up of components which are managed by the :py:class:`primaite.environment.observations.ObservationsHandler`. Each training scenario can define its own observation space, and the user can choose which information to inlude, and how it should be formatted.
-
-NodeLinkTable component
-----------------------
-For example, the :py:class:`primaite.environment.observations.NodeLinkTable` component represents the status of nodes and links as a ``gym.spaces.Box`` with an example format shown below:
-
-An example observation space is provided below:
-
-.. list-table:: Observation Space example
-   :widths: 25 25 25 25 25 25 25
-   :header-rows: 1
-
-   * -
-     - ID
-     - Hardware State
-     - Software State
-     - File System State
-     - Service / Protocol A
-     - Service / Protocol B
-   * - Node A
-     - 1
-     - 1
-     - 1
-     - 1
-     - 1
-     - 1
-   * - Node B
-     - 2
-     - 1
-     - 3
-     - 1
-     - 1
-     - 1
-   * - Node C
-     - 3
-     - 2
-     - 1
-     - 1
-     - 3
-     - 2
-   * - Link 1
-     - 5
-     - 0
-     - 0
-     - 0
-     - 0
-     - 10000
-   * - Link 2
-     - 6
-     - 0
-     - 0
-     - 0
-     - 0
-     - 10000
-   * - Link 3
-     - 7
-     - 0
-     - 0
-     - 0
-     - 5000
-     - 0
-
-For the nodes, the following values are represented:
-
-.. code-block::
-
-  [
-    ID
-    Hardware State            (1=ON,   2=OFF,  3=RESETTING,  4=SHUTTING_DOWN, 5=BOOTING)
-    Operating System State    (0=none, 1=GOOD, 2=PATCHING,   3=COMPROMISED)
-    File System State         (0=none, 1=GOOD, 2=CORRUPT,    3=DESTROYED,  4=REPAIRING, 5=RESTORING)
-    Service1/Protocol1 state  (0=none, 1=GOOD, 2=PATCHING,   3=COMPROMISED)
-    Service2/Protocol2 state  (0=none, 1=GOOD, 2=PATCHING,   3=COMPROMISED)
-  ]
-
-(Note that each service available in the network is provided as a column, although not all nodes may utilise all services)
-
-For the links, the following statuses are represented:
-
-.. code-block::
-
-  [
-    ID
-    Hardware State            (0=not applicable)
-    Operating System State    (0=not applicable)
-    File System State         (0=not applicable)
-    Service1/Protocol1 state  (Traffic load from this protocol on this link)
-    Service2/Protocol2 state  (Traffic load from this protocol on this link)
-  ]
-
-NodeStatus component
----------------------
-This is a MultiDiscrete observation space that can be though of as a one-dimensional vector of discrete states.
-The example above would have the following structure:
-
-.. code-block::
-
-  [
-    node1_info
-    node2_info
-    node3_info
-  ]
-
-Each ``node_info`` contains the following:
-
-.. code-block::
-
-  [
-    hardware_state    (0=none, 1=ON,   2=OFF,      3=RESETTING, 4=SHUTTING_DOWN, 5=BOOTING)
-    software_state    (0=none, 1=GOOD, 2=PATCHING, 3=COMPROMISED)
-    file_system_state (0=none, 1=GOOD, 2=CORRUPT,  3=DESTROYED, 4=REPAIRING, 5=RESTORING)
-    service1_state    (0=none, 1=GOOD, 2=PATCHING, 3=COMPROMISED)
-    service2_state    (0=none, 1=GOOD, 2=PATCHING, 3=COMPROMISED)
-  ]
-
-In a network with three nodes and two services, the full observation space would have 15 elements. It can be written with ``gym`` notation to indicate the number of discrete options for each of the elements of the observation space. For example:
-
-.. code-block::
-
-  gym.spaces.MultiDiscrete([4,5,6,4,4,4,5,6,4,4,4,5,6,4,4])
-
-.. note::
-  NodeStatus observation component provides information only about nodes. Links are not considered.
-
-LinkTrafficLevels
-----------------
-This component is a MultiDiscrete space showing the traffic flow levels on the links in the network, after applying a threshold to convert it from a continuous to a discrete value.
-There are two configurable parameters:
-* ``quantisation_levels`` determines how many discrete bins to use for converting the continuous traffic value to discrete (default is 5).
-* ``combine_service_traffic`` determines whether to separately output traffic use for each network protocol or whether to combine them into an overall value for the link. (default is ``True``)
-
-For example, with default parameters and a network with three links, the structure of this component would be:
-
-.. code-block::
-
-  [
-    link1_status
-    link2_status
-    link3_status
-  ]
-
-Each ``link_status`` is a number from 0-4 representing the network load in relation to bandwidth.
-
-.. code-block::
-
-  0 = No traffic (0%)
-  1 = low traffic (1%-33%)
-  2 = medium traffic (33%-66%)
-  3 = high traffic (66%-99%)
-  4 = max traffic/ overwhelmed (100%)
-
-Using ``gym`` notation, the shape of the obs space is: ``gym.spaces.MultiDiscrete([5,5,5])``.
-
-
-Action Spaces
-**************
-
-The action space available to the blue agent comes in two types:
-
- 1. Node-based
- 2. Access Control List
- 3. Any (Agent can take both node-based and ACL-based actions)
-
-The choice of action space used during a training session is determined in the config_[name].yaml file.
-
-**Node-Based**
-
-The agent is able to influence the status of nodes by switching them off, resetting, or patching operating systems and services. In this instance, the action space is an OpenAI Gym spaces.Discrete type, as follows:
-
- * Dictionary item {... ,1: [x1, x2, x3,x4] ...}
-   The placeholders inside the list under the key '1' mean the following:
-
-    * [0, num nodes] - Node ID (0 = nothing, node ID)
-    * [0, 4] - What property it's acting on (0 = nothing, 1 = state, 2 = SoftwareState, 3 = service state, 4 = file system state)
-    * [0, 3] - Action on property (0 = nothing, 1 = on / scan, 2 = off / repair, 3 = reset / patch / restore)
-    * [0, num services] - Resolves to service ID (0 = nothing, resolves to service)
-
-**Access Control List**
-
-The blue agent is able to influence the configuration of the Access Control List rule set (which implements a system-wide firewall). In this instance, the action space is an OpenAI spaces.Discrete type, as follows:
-
-   * Dictionary item {... ,1: [x1, x2, x3, x4, x5, x6] ...}
-   The placeholders inside the list under the key '1' mean the following:
-
-     * [0, 2] - Action (0 = do nothing, 1 = create rule, 2 = delete rule)
-     * [0, 1] - Permission (0 = DENY, 1 = ALLOW)
-     * [0, num nodes] - Source IP (0 = any, then 1 -> x resolving to IP addresses)
-     * [0, num nodes] - Dest IP (0 = any, then 1 -> x resolving to IP addresses)
-     * [0, num services] - Protocol (0 = any, then 1 -> x resolving to protocol)
-     * [0, num ports] - Port (0 = any, then 1 -> x resolving to port)
-
-**ANY**
-The agent is able to carry out both **Node-Based** and **Access Control List** operations.
-
-This means the dictionary will contain key-value pairs in the format of BOTH Node-Based and Access Control List as seen above.
-
-Rewards
-*******
-
-A reward value is presented back to the blue agent on the conclusion of every step. The reward value is calculated via two methods which combine to give the total value:
-
- 1. Node and service status
- 2. IER status
-
-**Node and service status**
-
-On every step, the status of each node is compared against both a reference environment (simulating the situation if the red and blue agents had not impacted the environment)
-and the before and after state of the environment. If the comparison against the reference environment shows no difference, then the score provided is "AllOK". If there is a
-difference with respect to the reference environment, the before and after states are compared, and a score determined. See :ref:`config` for details of reward values.
-
-**IER status**
-
-On every step, the full IER set is examined to determine whether green and red agent IERs are being permitted to run. Any red agent IERs running incur a penalty; any green agent
-IERs not permitted to run also incur a penalty. See :ref:`config` for details of reward values.
-
-Future Enhancements
-*******************
-
-The PrimAITE project has an ambition to include the following enhancements in future releases:
-
-* Integration with a suitable standardised framework to allow multi-agent integration
-* Integration with external threat emulation tools, either using off-line data, or integrating at runtime
+Head over to the :ref:`getting-started` page to install and setup PrimAITE!
--- a/docs/source/action_masking.rst
+++ b/docs/source/action_masking.rst
@@ -0,0 +1,148 @@
+.. only:: comment
+
+    © Crown-owned copyright 2025, Defence Science and Technology Laboratory UK
+
+.. _action_masking:
+
+Action Masking
+**************
+The PrimAITE simulation is able to provide action masks in the environment output. These action masks let the agents know
+about which actions are invalid based on the current environment state. For instance, it's not possible to install
+software on a node that is turned off. Therefore, if an agent has a ``node-software-install`` in it's action map for that node,
+the action mask will show `0` in the corresponding entry.
+
+*Note: just because an action is available in the action mask does not mean it will be successful when executed. It just means it's possible to try to execute the action at this time.*
+
+Configuration
+=============
+Action masking is supported for agents that use the `ProxyAgent` class (the class used for connecting to RL algorithms).
+In order to use action masking, set the agent_settings.action_masking parameter to True in the config file.
+
+Masking Logic
+=============
+The following logic is applied:
+
+------------------------------------------+------------------------------------------------+
+| Action                                   | Action Mask Logic                              |
+==========================================+================================================+
+| **do-nothing**                           | Always Possible.                               |
+------------------------------------------+------------------------------------------------+
+| **node-service-scan**                    | Node is on. Service is running.                |
+------------------------------------------+------------------------------------------------+
+| **node-service-stop**                    | Node is on. Service is running.                |
+------------------------------------------+------------------------------------------------+
+| **node-service-start**                   | Node is on. Service is stopped.                |
+------------------------------------------+------------------------------------------------+
+| **node-service-pause**                   | Node is on. Service is running.                |
+------------------------------------------+------------------------------------------------+
+| **node-service-resume**                  | Node is on. Service is paused.                 |
+------------------------------------------+------------------------------------------------+
+| **node-service-restart**                 | Node is on. Service is running.                |
+------------------------------------------+------------------------------------------------+
+| **node-service-disable**                 | Node is on.                                    |
+------------------------------------------+------------------------------------------------+
+| **node-service-enable**                  | Node is on. Service is disabled.               |
+------------------------------------------+------------------------------------------------+
+| **node-service-fix**                     | Node is on. Service is running.                |
+------------------------------------------+------------------------------------------------+
+| **node-application-execute**             | Node is on.                                    |
+------------------------------------------+------------------------------------------------+
+| **node-application-scan**                | Node is on. Application is running.            |
+------------------------------------------+------------------------------------------------+
+| **node-application-close**               | Node is on. Application is running.            |
+------------------------------------------+------------------------------------------------+
+| **node-application-fix**                 | Node is on. Application is running.            |
+------------------------------------------+------------------------------------------------+
+| **node-application-install**             | Node is on.                                    |
+------------------------------------------+------------------------------------------------+
+| **node-application-remove**              | Node is on.                                    |
+------------------------------------------+------------------------------------------------+
+| **node-file-scan**                       | Node is on. File exists. File not deleted.     |
+------------------------------------------+------------------------------------------------+
+| **node-file-create**                     | Node is on.                                    |
+------------------------------------------+------------------------------------------------+
+| **node-file-checkhash**                  | Node is on. File exists. File not deleted.     |
+------------------------------------------+------------------------------------------------+
+| **node-file-delete**                     | Node is on. File exists.                       |
+------------------------------------------+------------------------------------------------+
+| **node-file-repair**                     | Node is on. File exists. File not deleted.     |
+------------------------------------------+------------------------------------------------+
+| **node-file-restore**                    | Node is on. File exists. File is deleted.      |
+------------------------------------------+------------------------------------------------+
+| **node-file-corrupt**                    | Node is on. File exists. File not deleted.     |
+------------------------------------------+------------------------------------------------+
+| **node-file-access**                     | Node is on. File exists. File not deleted.     |
+------------------------------------------+------------------------------------------------+
+| **node-folder-create**                   | Node is on.                                    |
+------------------------------------------+------------------------------------------------+
+| **node-folder-scan**                     | Node is on. Folder exists. Folder not deleted. |
+------------------------------------------+------------------------------------------------+
+| **node-folder-checkhash**                | Node is on. Folder exists. Folder not deleted. |
+------------------------------------------+------------------------------------------------+
+| **node-folder-repair**                   | Node is on. Folder exists. Folder not deleted. |
+------------------------------------------+------------------------------------------------+
+| **node-folder-restore**                  | Node is on. Folder exists. Folder is deleted.  |
+------------------------------------------+------------------------------------------------+
+| **node-os-scan**                         | Node is on.                                    |
+------------------------------------------+------------------------------------------------+
+| **host-nic-enable**                      | NIC is disabled. Node is on.                   |
+------------------------------------------+------------------------------------------------+
+| **host-nic-disable**                     | NIC is enabled. Node is on.                    |
+------------------------------------------+------------------------------------------------+
+| **node-shutdown**                        | Node is on.                                    |
+------------------------------------------+------------------------------------------------+
+| **node-startup**                         | Node is off.                                   |
+------------------------------------------+------------------------------------------------+
+| **node-reset**                           | Node is on.                                    |
+------------------------------------------+------------------------------------------------+
+| **node-nmap-ping-scan**                  | Node is on.                                    |
+------------------------------------------+------------------------------------------------+
+| **node-nmap-port-scan**                  | Node is on.                                    |
+------------------------------------------+------------------------------------------------+
+| **node-network-service-recon**           | Node is on.                                    |
+------------------------------------------+------------------------------------------------+
+| **network-port-enable**                  | Node is on. Router is on.                      |
+------------------------------------------+------------------------------------------------+
+| **network-port-disable**                 | Router is on.                                  |
+------------------------------------------+------------------------------------------------+
+| **router-acl-add-rule**                  | Router is on.                                  |
+------------------------------------------+------------------------------------------------+
+| **router-acl-remove-rule**               | Router is on.                                  |
+------------------------------------------+------------------------------------------------+
+| **firewall-acl-add-rule**                | Firewall is on.                                |
+------------------------------------------+------------------------------------------------+
+| **firewall-acl-remove-rule**             | Firewall is on.                                |
+------------------------------------------+------------------------------------------------+
+| **configure-database-client**            | Node is on.                                    |
+------------------------------------------+------------------------------------------------+
+| **configure-ransomware-script**          | Node is on.                                    |
+------------------------------------------+------------------------------------------------+
+| **c2-server-ransomware-configure**       | Node is on.                                    |
+------------------------------------------+------------------------------------------------+
+| **configure-dos-bot**                    | Node is on.                                    |
+------------------------------------------+------------------------------------------------+
+| **configure-c2-beacon**                  | Node is on.                                    |
+------------------------------------------+------------------------------------------------+
+| **c2-server-ransomware-launch**          | Node is on.                                    |
+------------------------------------------+------------------------------------------------+
+| **c2-server-terminal-command**           | Node is on.                                    |
+------------------------------------------+------------------------------------------------+
+| **c2-server-data-exfiltrate**            | Node is on.                                    |
+------------------------------------------+------------------------------------------------+
+| **node-account-change-password**         | Node is on.                                    |
+------------------------------------------+------------------------------------------------+
+| **node-session-remote-login**            | Node is on.                                    |
+------------------------------------------+------------------------------------------------+
+| **node-session-remote-logoff**           | Node is on.                                    |
+------------------------------------------+------------------------------------------------+
+| **node-send-remote-command**             | Node is on.                                    |
+------------------------------------------+------------------------------------------------+
+
+
+Mechanism
+=========
+The environment iterates over the RL agent's ``action_map`` and generates the corresponding simulator :ref:`request <request_system>` string. It uses the :py:meth:`RequestManager.check_valid()<primaite.simulator.core.RequestManager.check_valid>` method to invoke the relevant :py:class:`RequestPermissionValidator <primaite.simulator.core.RequestPermissionValidator>` without actually running the request on the simulation.
+
+Current Limitations
+===================
+Currently, action masking only considers whether the action as a whole is possible, it doesn't verify that the exact parameter combination passed to the action make sense in the current context. or instance, if ACL rule 3 on router_1 is already populated, the action for adding another rule at position 3 will be available regardless, as long as that router is turned on. This will never block valid actions. It will just occasionally allow invalid actions.
--- a/docs/source/config.rst
+++ b/docs/source/config.rst
@@ -1,489 +1,43 @@
 .. only:: comment

-    © Crown-owned copyright 2023, Defence Science and Technology Laboratory UK
+    © Crown-owned copyright 2025, Defence Science and Technology Laboratory UK

-.. _config:
+.. _Configurable_Items:

-The Config Files Explained
-==========================
+PrimAITE |VERSION| Configuration
+********************************

-PrimAITE uses two configuration files for its operation:
+PrimAITE uses YAML configuration files to define everything needed to create the training environment for RL agents, including the network, the scripted agents, and the RL agent's action space, observation space, and reward function.

-* **The Training Config**
+Example Configuration Hierarchy
+###############################
+The top level configuration items in a configuration file is as follows

-    Used to define the top-level settings of the PrimAITE environment, the reward values, and the session that is to be run.
+.. code-block:: yaml

-* **The Lay Down Config**
+    io_settings:
+    ...
+    game:
+    ...
+    agents:
+    ...
+    simulation:
+    ...

-    Used to define the low-level settings of a session, including the network laydown, green / red agent information exchange requirements (IERSs) and Access Control Rules.
+These are expanded upon in the Configurable items section below

-Training Config:
-*******************
+Configurable items
+##################

-The Training Config file consists of the following attributes:
+.. toctree::
+   :maxdepth: 1

-**Generic Config Values**
+   configuration/io_settings.rst
+   configuration/game.rst
+   configuration/agents.rst
+   configuration/simulation.rst

+Varying The Configuration Each Episode
+######################################

-* **agent_framework** [enum]
-
-    This identifies the agent framework to be used to instantiate the agent algorithm. Select from one of the following:
-
-    * NONE - Where a user developed agent is to be used
-    * SB3 - Stable Baselines3
-    * RLLIB - Ray RLlib.
-
-* **agent_identifier**
-
-    This identifies the agent to use for the session. Select from one of the following:
-
-    * A2C - Advantage Actor Critic
-    * PPO - Proximal Policy Optimization
-    * HARDCODED - A custom built deterministic agent
-    * RANDOM - A Stochastic random agent
-
-
-* **random_red_agent** [bool]
-
-    Determines if the session should be run with a random red agent
-
-* **action_type** [enum]
-
-    Determines whether a NODE, ACL, or ANY (combined NODE & ACL) action space format is adopted for the session
-
-
-* **OBSERVATION_SPACE** [dict]
-
-    Allows for user to configure observation space by combining one or more observation components. List of available
-    components is in :py:mod:`primaite.environment.observations`.
-
-    The observation space config item should have a ``components`` key which is a list of components. Each component
-    config must have a ``name`` key, and can optionally have an ``options`` key. The ``options`` are passed to the
-    component while it is being initialised.
-
-    This example illustrates the correct format for the observation space config item
-
-    .. code-block:: yaml
-
-        observation_space:
-        components:
-          - name: NODE_LINK_TABLE
-          - name: NODE_STATUSES
-          - name: LINK_TRAFFIC_LEVELS
-          - name: ACCESS_CONTROL_LIST
-            options:
-              combine_service_traffic : False
-              quantisation_levels: 99
-
-
-    Currently available components are:
-
-      * :py:mod:`NODE_LINK_TABLE<primaite.environment.observations.NodeLinkTable>` this does not accept any additional options
-      * :py:mod:`NODE_STATUSES<primaite.environment.observations.NodeStatuses>`, this does not accept any additional options
-      * :py:mod:`ACCESS_CONTROL_LIST<primaite.environment.observations.AccessControlList>`, this does not accept additional options
-      * :py:mod:`LINK_TRAFFIC_LEVELS<primaite.environment.observations.LinkTrafficLevels>`, this accepts the following options:
-
-        * ``combine_service_traffic`` - whether to consider bandwidth use separately for each network protocol or combine them into a single bandwidth reading (boolean)
-        * ``quantisation_levels`` - how many discrete bandwidth usage levels to use for encoding. This can be an integer equal to or greater than 3.
-
-    The other configurable item is ``flatten`` which is false by default. When set to true, the observation space is flattened (turned into a 1-D vector). You should use this if your RL agent does not natively support observation space types like ``gym.Spaces.Tuple``.
-
-* **num_train_episodes** [int]
-
-    This defines the number of episodes that the agent will train for.
-
-
-* **num_train_steps** [int]
-
-    Determines the number of steps to run in each episode of the training session.
-
-
-* **num_eval_episodes** [int]
-
-    This defines the number of episodes that the agent will be evaluated over.
-
-
-* **num_eval_steps** [int]
-
-    Determines the number of steps to run in each episode of the evaluation session.
-
-
-* **time_delay** [int]
-
-    The time delay (in milliseconds) to take between each step when running a GENERIC agent session
-
-
-* **session_type** [text]
-
-    Type of session to be run (TRAINING, EVALUATION, or BOTH)
-
-* **load_agent** [bool]
-
-    Determine whether to load an agent from file
-
-* **agent_load_file** [text]
-
-    File path and file name of agent if you're loading one in
-
-* **observation_space_high_value** [int]
-
-    The high value to use for values in the observation space. This is set to 1000000000 by default, and should not need changing in most cases
-
-* **implicit_acl_rule** [str]
-
-    Determines which Explicit rule the ACL list has - two options are: DENY or ALLOW.
-
-* **max_number_acl_rules** [int]
-
-    Sets a limit on how many ACL rules there can be in the ACL list throughout the training session.
-
-**Reward-Based Config Values**
-
-Rewards are calculated based on the difference between the current state and reference state (the 'should be' state) of the environment.
-
-* **Generic [all_ok]** [float]
-
-    The score to give when the current situation (for a given component) is no different from that expected in the baseline (i.e. as though no blue or red agent actions had been undertaken)
-
-* **Node Hardware State [off_should_be_on]** [float]
-
-    The score to give when the node should be on, but is off
-
-* **Node Hardware State [off_should_be_resetting]** [float]
-
-    The score to give when the node should be resetting, but is off
-
-* **Node Hardware State [on_should_be_off]** [float]
-
-    The score to give when the node should be off, but is on
-
-* **Node Hardware State [on_should_be_resetting]** [float]
-
-    The score to give when the node should be resetting, but is on
-
-* **Node Hardware State [resetting_should_be_on]** [float]
-
-    The score to give when the node should be on, but is resetting
-
-* **Node Hardware State [resetting_should_be_off]** [float]
-
-    The score to give when the node should be off, but is resetting
-
-* **Node Hardware State [resetting]** [float]
-
-    The score to give when the node is resetting
-
-* **Node Operating System or Service State [good_should_be_patching]** [float]
-
-    The score to give when the state should be patching, but is good
-
-* **Node Operating System or Service State [good_should_be_compromised]** [float]
-
-    The score to give when the state should be compromised, but is good
-
-* **Node Operating System or Service State [good_should_be_overwhelmed]** [float]
-
-    The score to give when the state should be overwhelmed, but is good
-
-* **Node Operating System or Service State [patching_should_be_good]** [float]
-
-    The score to give when the state should be good, but is patching
-
-* **Node Operating System or Service State [patching_should_be_compromised]** [float]
-
-    The score to give when the state should be compromised, but is patching
-
-* **Node Operating System or Service State [patching_should_be_overwhelmed]** [float]
-
-    The score to give when the state should be overwhelmed, but is patching
-
-* **Node Operating System or Service State [patching]** [float]
-
-    The score to give when the state is patching
-
-* **Node Operating System or Service State [compromised_should_be_good]** [float]
-
-    The score to give when the state should be good, but is compromised
-
-* **Node Operating System or Service State [compromised_should_be_patching]** [float]
-
-    The score to give when the state should be patching, but is compromised
-
-* **Node Operating System or Service State [compromised_should_be_overwhelmed]** [float]
-
-    The score to give when the state should be overwhelmed, but is compromised
-
-* **Node Operating System or Service State [compromised]** [float]
-
-    The score to give when the state is compromised
-
-* **Node Operating System or Service State [overwhelmed_should_be_good]** [float]
-
-    The score to give when the state should be good, but is overwhelmed
-
-* **Node Operating System or Service State [overwhelmed_should_be_patching]** [float]
-
-    The score to give when the state should be patching, but is overwhelmed
-
-* **Node Operating System or Service State [overwhelmed_should_be_compromised]** [float]
-
-    The score to give when the state should be compromised, but is overwhelmed
-
-* **Node Operating System or Service State [overwhelmed]** [float]
-
-    The score to give when the state is overwhelmed
-
-* **Node File System State [good_should_be_repairing]** [float]
-
-    The score to give when the state should be repairing, but is good
-
-* **Node File System State [good_should_be_restoring]** [float]
-
-    The score to give when the state should be restoring, but is good
-
-* **Node File System State [good_should_be_corrupt]** [float]
-
-    The score to give when the state should be corrupt, but is good
-
-* **Node File System State [good_should_be_destroyed]** [float]
-
-    The score to give when the state should be destroyed, but is good
-
-* **Node File System State [repairing_should_be_good]** [float]
-
-    The score to give when the state should be good, but is repairing
-
-* **Node File System State [repairing_should_be_restoring]** [float]
-
-    The score to give when the state should be restoring, but is repairing
-
-* **Node File System State [repairing_should_be_corrupt]** [float]
-
-    The score to give when the state should be corrupt, but is repairing
-
-* **Node File System State [repairing_should_be_destroyed]** [float]
-
-    The score to give when the state should be destroyed, but is repairing
-
-* **Node File System State [repairing]** [float]
-
-    The score to give when the state is repairing
-
-* **Node File System State [restoring_should_be_good]** [float]
-
-    The score to give when the state should be good, but is restoring
-
-* **Node File System State [restoring_should_be_repairing]** [float]
-
-    The score to give when the state should be repairing, but is restoring
-
-* **Node File System State [restoring_should_be_corrupt]** [float]
-
-    The score to give when the state should be corrupt, but is restoring
-
-* **Node File System State [restoring_should_be_destroyed]** [float]
-
-    The score to give when the state should be destroyed, but is restoring
-
-* **Node File System State [restoring]** [float]
-
-    The score to give when the state is restoring
-
-* **Node File System State [corrupt_should_be_good]** [float]
-
-    The score to give when the state should be good, but is corrupt
-
-* **Node File System State [corrupt_should_be_repairing]** [float]
-
-    The score to give when the state should be repairing, but is corrupt
-
-* **Node File System State [corrupt_should_be_restoring]** [float]
-
-    The score to give when the state should be restoring, but is corrupt
-
-* **Node File System State [corrupt_should_be_destroyed]** [float]
-
-    The score to give when the state should be destroyed, but is corrupt
-
-* **Node File System State [corrupt]** [float]
-
-    The score to give when the state is corrupt
-
-* **Node File System State [destroyed_should_be_good]** [float]
-
-    The score to give when the state should be good, but is destroyed
-
-* **Node File System State [destroyed_should_be_repairing]** [float]
-
-    The score to give when the state should be repairing, but is destroyed
-
-* **Node File System State [destroyed_should_be_restoring]** [float]
-
-    The score to give when the state should be restoring, but is destroyed
-
-* **Node File System State [destroyed_should_be_corrupt]** [float]
-
-    The score to give when the state should be corrupt, but is destroyed
-
-* **Node File System State [destroyed]** [float]
-
-    The score to give when the state is destroyed
-
-* **Node File System State [scanning]** [float]
-
-    The score to give when the state is scanning
-
-* **IER Status [red_ier_running]** [float]
-
-    The score to give when a red agent IER is permitted to run
-
-* **IER Status [green_ier_blocked]** [float]
-
-    The score to give when a green agent IER is prevented from running
-
-**Patching / Reset Durations**
-
-* **os_patching_duration** [int]
-
-    The number of steps to take when patching an Operating System
-
-* **node_reset_duration** [int]
-
-    The number of steps to take when resetting a node's hardware state
-
-* **service_patching_duration** [int]
-
-    The number of steps to take when patching a service
-
-* **file_system_repairing_limit** [int]:
-
-    The number of steps to take when repairing the file system
-
-* **file_system_restoring_limit** [int]
-
-    The number of steps to take when restoring the file system
-
-* **file_system_scanning_limit** [int]
-
-    The number of steps to take when scanning the file system
-
-* **deterministic** [bool]
-
-   Set to true if the agent evaluation should be deterministic. Default is ``False``
-
-* **seed** [int]
-
-   Seed used in the randomisation in agent training. Default is ``None``
-
-The Lay Down Config
-*******************
-
-The lay down config file consists of the following attributes:
-
-
-* **itemType: STEPS** [int]
-
-* **item_type: PORTS** [int]
-
-    Provides a list of ports modelled in this session
-
-* **item_type: SERVICES** [freetext]
-
-    Provides a list of services modelled in this session
-
-* **item_type: NODE**
-
-    Defines a node included in the system laydown being simulated. It should consist of the following attributes:
-
-     * **id** [int]: Unique ID for this YAML item
-     * **name** [freetext]: Human-readable name of the component
-     * **node_class** [enum]: Relates to the base type of the node. Can be SERVICE, ACTIVE or PASSIVE. PASSIVE nodes do not have an operating system or services. ACTIVE nodes have an operating system, but no services. SERVICE nodes have both an operating system and one or more services
-     * **node_type** [enum]: Relates to the component type. Can be one of CCTV, SWITCH, COMPUTER, LINK, MONITOR, PRINTER, LOP, RTU, ACTUATOR or SERVER
-     * **priority** [enum]: Provides a priority for each node. Can be one of P1, P2, P3, P4 or P5 (which P1 being the highest)
-     * **hardware_state** [enum]: The initial hardware state of the node. Can be one of ON, OFF or RESETTING
-     * **ip_address** [IP address]: The IP address of the component in format xxx.xxx.xxx.xxx
-     * **software_state** [enum]: The intial state of the node operating system. Can be GOOD, PATCHING or COMPROMISED
-     * **file_system_state** [enum]: The initial state of the node file system. Can be GOOD, CORRUPT, DESTROYED, REPAIRING or RESTORING
-     * **services**: For each service associated with the node:
-
-        * **name** [freetext]: Free-text name of the service, but must match one of the services defined for the system in the services list
-        * **port** [int]: Integer value of the port related to this service, but must match one of the ports defined for the system in the ports list
-        * **state** [enum]: The initial state of the service. Can be one of GOOD, PATCHING, COMPROMISED or OVERWHELMED
-
-* **item_type: LINK**
-
-    Defines a link included in the system laydown being simulated. It should consist of the following attributes:
-
-     * **id** [int]: Unique ID for this YAML item
-     * **name** [freetext]: Human-readable name of the component
-     * **bandwidth** [int]: The bandwidth (in bits/s) of the link
-     * **source** [int]: The ID of the source node
-     * **destination** [int]: The ID of the destination node
-
-* **item_type: GREEN_IER**
-
-    Defines a green agent Information Exchange Requirement (IER). It should consist of:
-
-     * **id** [int]: Unique ID for this YAML item
-     * **start_step** [int]: The start step (in the episode) for this IER to begin
-     * **end_step** [int]: The end step (in the episode) for this IER to finish
-     * **load** [int]: The load (in bits/s) for this IER to apply to links
-     * **protocol** [freetext]: The protocol to apply to the links. This must match a value in the services list
-     * **port** [int]: The port that the protocol is running on. This must match a value in the ports list
-     * **source** [int]: The ID of the source node
-     * **destination** [int]: The ID of the destination node
-     * **mission_criticality** [enum]: The mission criticality of this IER (with 5 being highest, 1 lowest)
-
-* **item_type: RED_IER**
-
-    Defines a red agent Information Exchange Requirement (IER). It should consist of:
-
-     * **id** [int]: Unique ID for this YAML item
-     * **start_step** [int]: The start step (in the episode) for this IER to begin
-     * **end_step** [int]: The end step (in the episode) for this IER to finish
-     * **load** [int]: The load (in bits/s) for this IER to apply to links
-     * **protocol** [freetext]: The protocol to apply to the links. This must match a value in the services list
-     * **port** [int]: The port that the protocol is running on. This must match a value in the ports list
-     * **source** [int]: The ID of the source node
-     * **destination** [int]: The ID of the destination node
-     * **mission_criticality** [enum]: Not currently used. Default to 0
-
-* **item_type: GREEN_POL**
-
-    Defines a green agent pattern-of-life instruction. It should consist of:
-
-      * **id** [int]: Unique ID for this YAML item
-      * **start_step** [int]: The start step (in the episode) for this PoL to begin
-      * **end_step** [int]: Not currently used. Default to same as start step
-      * **nodeId** [int]: The ID of the node to apply the PoL to
-      * **type** [enum]: The type of PoL to apply. Can be one of OPERATING, OS or SERVICE
-      * **protocol** [freetext]: The protocol to be affected if SERVICE type is chosen. Must match a value in the services list
-      * **state** [enuum]: The state to apply to the node (which represents the PoL change). Can be one of ON, OFF or RESETTING (for node state) or GOOD, PATCHING or COMPROMISED (for Software State) or GOOD, PATCHING, COMPROMISED or OVERWHELMED (for service state)
-
-* **item_type: RED_POL**
-
-    Defines a red agent pattern-of-life instruction. It should consist of:
-
-      * **id** [int]: Unique ID for this YAML item
-      * **start_step** [int]: The start step (in the episode) for this PoL to begin
-      * **end_step** [int]: Not currently used. Default to same as start step
-      * **targetNodeId** [int]: The ID of the node to apply the PoL to
-      * **initiator** [enum]: What initiates the PoL. Can be DIRECT, IER or SERVICE
-      * **type** [enum]: The type of PoL to apply. Can be one of OPERATING, OS or SERVICE
-      * **protocol** [freetext]: The protocol to be affected if SERVICE type is chosen. Must match a value in the services list
-      * **state** [enum]: The state to apply to the node (which represents the PoL change). Can be one of ON, OFF or RESETTING (for node state) or GOOD, PATCHING or COMPROMISED (for Software State) or GOOD, PATCHING, COMPROMISED or OVERWHELMED (for service state) or GOOD, CORRUPT, DESTROYED, REPAIRING or RESTORING (for file system state)
-      * **sourceNodeId** [int] The ID of the source node containing the service to check (used for SERVICE initiator)
-      * **sourceNodeService** [freetext]: The service on the source node to check (used for SERVICE initiator). Must match a value in the services list for this node
-      * **sourceNodeServiceState** [enum]: The state of the source node service to check (used for SERVICE initiator). Can be one of GOOD, PATCHING, COMPROMISED or OVERWHELMED
-
-* **item_type: ACL_RULE**
-
-    Defines an initial Access Control List (ACL) rule. It should consist of:
-
-      * **id** [int]: Unique ID for this YAML item
-      * **permission** [enum]: Defines either an allow or deny rule. Value must be either DENY or ALLOW
-      * **source** [IP address]: Defines the source IP address for the rule in xxx.xxx.xxx.xxx format
-      * **destination** [IP address]: Defines the destination IP address for the rule in xxx.xxx.xxx.xxx format
-      * **protocol** [freetext]: Defines the protocol for the rule. Must match a value in the services list
-      * **port** [int]: Defines the port for the rule. Must match a value in the ports list
-      * **position** [int]: Defines where to place the ACL rule in the list. Lower index or (higher up in the list) means they are checked first. Index starts at 0 (Python indexes).
+PrimAITE allows for the configuration to be varied each episode. This is done by specifying a configuration folder instead of a single file. A full explanation is provided in the notebook `Using-Episode-Schedules.ipynb`. Please find the notebook in the user notebooks directory.
--- a/docs/source/configuration/agents.rst
+++ b/docs/source/configuration/agents.rst
@@ -0,0 +1,160 @@
+.. only:: comment
+
+    © Crown-owned copyright 2025, Defence Science and Technology Laboratory UK
+
+
+``agents``
+==========
+Agents can be scripted (deterministic and stochastic), or controlled by a reinforcement learning algorithm. Not to be confused with an RL agent, the term agent here is used to refer to an entity that sends requests to the simulated network. In this part of the config, each agent's action space, observation space, and reward function can be defined. All three are defined in a modular way.
+
+``agents`` hierarchy
+--------------------
+
+.. code-block:: yaml
+
+    agents:
+    - ref: red_agent_example
+        ...
+    - ref: blue_agent_example
+        ...
+    - ref: green_agent_example
+    team: GREEN
+    type: probabilistic-agent
+
+    agent_settings:
+      start_step: 5
+      frequency: 4
+      variance: 3
+      flatten_obs: False
+
+``ref``
+-------
+The reference to be used for the given agent.
+
+``team``
+--------
+Specifies if the agent is malicious (``RED``), benign (``GREEN``), or defensive (``BLUE``). Currently this value is not used for anything other than for human readability in the configuration file.
+
+``type``
+--------
+Specifies which class should be used for the agent. ``proxy-agent`` is used for agents that receive instructions from an RL algorithm. Scripted agents like ``red-database-corrupting-agent`` and ``probabilistic-agent`` generate their own behaviour.
+
+Available agent types:
+
+- ``probabilistic-agent``
+- ``proxy-agent``
+- ``red-database-corrupting-agent``
+
+``observation_space``
+---------------------
+Defines the observation space of the agent.
+
+``type``
+^^^^^^^^
+
+selects which python class from the :py:mod:`primaite.game.agent.observation` module is used for the overall observation structure.
+
+``options``
+^^^^^^^^^^^
+
+Allows configuration of the chosen observation type. These are optional.
+
+    * ``num_services_per_node``, ``num_folders_per_node``, ``num_files_per_folder``, ``num_nics_per_node`` all define the shape of the observation space. The size and shape of the obs space must remain constant, but the number of files, folders, acl rules, and other components can change within an episode. Therefore padding is performed and these options set the size of the obs space.
+    * ``nodes``: list of nodes that will be present in this agent's observation space. The ``node_ref`` relates to the human-readable unique reference defined later in the ``simulation`` part of the config. Each node can also be configured with services, and files that should be monitored.
+    * ``links``: list of links that will be present in this agent's observation space. The ``link_ref`` relates to the human-readable unique reference defined later in the ``simulation`` part of the config.
+    * ``acl``: configure how the agent reads the access control list on the router in the simulation. ``router_node_ref`` is for selecting which router's acl table should be used. ``ip_list`` sets the encoding of ip addresses as integers within the observation space.
+
+For more information see :py:mod:`primaite.game.agent.observations`
+
+``action_space``
+----------------
+
+The action space is configured to be made up of individual action types. Once configured, the agent can select an action type and some optional action parameters at every step. For example: The ``NODE_SERVICE_SCAN`` action takes the parameters ``node_id`` and ``service_id``.
+
+
+``action_map``
+^^^^^^^^^^^^^^
+
+Restricts the possible combinations of action type / action parameter values to reduce the overall size of the action space. By default, every possible combination of actions and parameters will be assigned an integer for the agent's ``MultiDiscrete`` action space. Instead, the ``action_map`` allows you to list the actions corresponding to each integer in the ``MultiDiscrete`` space.
+
+This is Optional.
+
+``options``
+^^^^^^^^^^^
+
+Options that apply to all action components. These are optional.
+
+    * ``nodes``: list the nodes that the agent can act on, the order of this list defines the mapping between nodes and ``node_id`` integers.
+    * ``max_folders_per_node``, ``max_files_per_folder``, ``max_services_per_node``, ``max_nics_per_node``, ``max_acl_rules`` all are used to define the size of the action space.
+
+For more information see :py:mod:`primaite.game.agent.actions`
+
+``reward_function``
+-------------------
+
+Similar to action space, this is defined as a list of components from the :py:mod:`primaite.game.agent.rewards` module.
+
+``reward_components``
+^^^^^^^^^^^^^^^^^^^^^
+A list of available reward types from :py:mod:`primaite.game.agent.rewards.RewardFunction.rew_class_identifiers`
+
+e.g.
+
+.. code-block:: yaml
+
+    reward_components:
+        - type: dummy
+          weight: 1.0
+        - type: database-file-integrity
+          weight: 0.40
+          options:
+            node_hostname: database_server
+            folder_name: database
+            file_name: database.db
+
+
+``agent_settings``
+------------------
+
+Settings passed to the agent during initialisation. Determines how the agent will behave during training.
+
+e.g.
+
+.. code-block:: yaml
+
+    agent_settings:
+        start_step: 25
+        frequency: 20
+        variance: 5
+
+``start_step``
+^^^^^^^^^^^^^^
+
+Optional. Default value is ``5``.
+
+The timestep where the agent begins performing actions.
+
+``frequency``
+^^^^^^^^^^^^^
+
+Optional. Default value is ``5``.
+
+The number of timesteps the agent will wait before performing another action.
+
+``variance``
+^^^^^^^^^^^^
+
+Optional. Default value is ``0``.
+
+The amount of timesteps that the frequency can randomly change.
+
+``flatten_obs``
+---------------
+
+If ``True``, gymnasium flattening will be performed on the observation space before sending to the agent. Set this to ``True`` if your agent does not support nested observation spaces.
+
+``Agent History``
+-----------------
+
+Agents will record their action log for each step. This is a summary of what the agent did, along with response information from requests within the simulation.
+A summary of the actions taken by the agent can be viewed using the `show_history()` function. By default, this will display all actions taken apart from ``do-nothing``.
--- a/docs/source/configuration/game.rst
+++ b/docs/source/configuration/game.rst
@@ -0,0 +1,62 @@
+.. only:: comment
+
+    © Crown-owned copyright 2025, Defence Science and Technology Laboratory UK
+
+
+``game``
+========
+This section defines high-level settings that apply across the game, currently it's used to help shape the action and observation spaces by restricting which ports and internet protocols should be considered. Here, users can also set the maximum number of steps in an episode.
+
+``game`` hierarchy
+------------------
+
+.. code-block:: yaml
+
+    game:
+        max_episode_length: 256
+        ports:
+            - ARP
+            - DNS
+            - HTTP
+            - POSTGRES_SERVER
+        protocols:
+            - ICMP
+            - TCP
+            - UDP
+        thresholds:
+            nmne:
+                high: 10
+                medium: 5
+                low: 0
+        seed: 1
+
+``max_episode_length``
+----------------------
+
+Optional. Default value is ``256``.
+
+The maximum number of episodes a Reinforcement Learning agent(s) can be trained for.
+
+``ports``
+---------
+
+A list of ports that the Reinforcement Learning agent(s) are able to see in the observation space.
+
+See :py:const:`primaite.utils.validation.port.PORT_LOOKUP` for a list of ports.
+
+``protocols``
+-------------
+
+A list of protocols that the Reinforcement Learning agent(s) are able to see in the observation space.
+
+See :py:const:`primaite.utils.validation.ip_protocol.PROTOCOL_LOOKUP` for a list of protocols.
+
+``thresholds``
+--------------
+
+These are used to determine the thresholds of high, medium and low categories for counted observation occurrences.
+
+``seed``
+--------
+
+Used to configure the random seeds used within PrimAITE, ensuring determinism within episode/session runs. If empty or set to -1, no seed is set. The given seed value is logged (by default) in ``primaite/<VERSION>/sessions/<DATE>/<TIME>/simulation_output``.
--- a/docs/source/configuration/io_settings.rst
+++ b/docs/source/configuration/io_settings.rst
@@ -0,0 +1,103 @@
+.. only:: comment
+
+    © Crown-owned copyright 2025, Defence Science and Technology Laboratory UK
+
+.. _io_settings:
+
+``io_settings``
+===============
+This section configures how PrimAITE saves data during simulation and training.
+
+``io_settings`` hierarchy
+-------------------------
+
+.. code-block:: yaml
+
+    io_settings:
+        save_agent_actions: True
+        save_step_metadata: False
+        save_pcap_logs: False
+        save_sys_logs: False
+        save_agent_logs: False
+        write_sys_log_to_terminal: False
+        write_agent_log_to_terminal: False
+        sys_log_level: WARNING
+        agent_log_level: INFO
+
+
+``save_agent_actions``
+----------------------
+
+Optional. Default value is ``True``.
+
+If ``True``, this will create a JSON file each episode detailing every agent's action in each step of that episode, formatted according to the CAOS format. This includes scripted, RL, and red agents.
+
+``save_step_metadata``
+----------------------
+
+Optional. Default value is ``False``.
+
+If ``True``, The RL agent(s) actions, environment states and other data will be saved at every single step.
+
+
+``save_pcap_logs``
+------------------
+
+Optional. Default value is ``False``.
+
+If ``True``, then the pcap files which contain all network traffic during the simulation will be saved.
+
+
+``save_sys_logs``
+-----------------
+
+Optional. Default value is ``False``.
+
+If ``True``, then the log files which contain all node actions during the simulation will be saved.
+
+``save_agent_logs``
+-----------------
+
+Optional. Default value is ``False``.
+
+If ``True``, then the log files which contain all human readable agent behaviour during the simulation will be saved.
+
+``write_sys_log_to_terminal``
+-----------------------------
+
+Optional. Default value is ``False``.
+
+If ``True``, PrimAITE will print sys log to the terminal.
+
+``write_agent_log_to_terminal``
+-----------------------------
+
+Optional. Default value is ``False``.
+
+If ``True``, PrimAITE will print all human readable agent behaviour logs to the terminal.
+
+
+``sys_log_level & agent_log_level``
+---------------------------------
+
+Optional. Default value is ``WARNING``.
+
+The level of logging that should be visible in the syslog, agent logs or the logs output to the terminal.
+
+``save_sys_logs`` or ``write_sys_log_to_terminal`` has to be set to ``True`` for this setting to be used.
+
+This is also true for agent behaviour logging.
+
+Available options are:
+
+- ``DEBUG``: Debug level items and the items below
+- ``INFO``: Info level items and the items below
+- ``WARNING``: Warning level items and the items below
+- ``ERROR``: Error level items and the items below
+- ``CRITICAL``: Only critical level logs
+
+See also |logging_levels|
+
+.. |logging_levels| raw:: html
+
+    <a href="https://docs.python.org/3/library/logging.html#logging-levels" target="blank">Python logging levels</a>
--- a/docs/source/configuration/simulation.rst
+++ b/docs/source/configuration/simulation.rst
@@ -0,0 +1,125 @@
+.. only:: comment
+
+    © Crown-owned copyright 2025, Defence Science and Technology Laboratory UK
+
+
+``simulation``
+==============
+In this section the network layout is defined. This part of the config follows a hierarchical structure.
+At the top level of the network are ``nodes``, ``links`` and ``airspace``.
+
+e.g.
+
+.. code-block:: yaml
+
+    simulation:
+        network:
+            nodes:
+            ...
+            links:
+            ...
+            airspace:
+            ...
+
+
+``nodes``
+---------
+
+This is where the list of nodes are defined. Some items will differ according to the node type, however, there will be common items such as a node's hostname (which is used by the agent) and the node's ``type``.
+
+To see the configuration for these nodes, refer to the following:
+
+.. toctree::
+    :maxdepth: 1
+    :glob:
+
+    simulation/nodes/computer
+    simulation/nodes/firewall
+    simulation/nodes/router
+    simulation/nodes/server
+    simulation/nodes/switch
+    simulation/nodes/wireless_router
+    simulation/nodes/network_examples
+
+``links``
+---------
+
+This is where the links between the nodes are formed.
+
+e.g.
+
+In order to recreate the network below, we will need to create 2 links:
+
+- a link from computer_1 to the switch
+- a link from computer_2 to the switch
+
+.. image:: ../../_static/switched_p2p_network.png
+    :width: 500
+    :align: center
+
+this results in:
+
+.. code-block:: yaml
+
+    links:
+        - endpoint_a_hostname: computer_1
+        endpoint_a_port: 1 # port 1 on computer_1
+        endpoint_b_hostname: switch
+        endpoint_b_port: 1 # port 1 on switch
+        bandwidth: 100
+        - endpoint_a_hostname: computer_2
+        endpoint_a_port: 1 # port 1 on computer_2
+        endpoint_b_hostname: switch
+        endpoint_b_port: 2 # port 2 on switch
+        bandwidth: 100
+
+
+``endpoint_a_hostname``
+^^^^^^^^^^^^^^^^^^^^^^^
+
+The ``hostname`` of the node which must be connected.
+
+``endpoint_a_port``
+^^^^^^^^^^^^^^^^^^^
+
+The port on ``endpoint_a_hostname`` which is to be connected to ``endpoint_b_port``.
+This accepts an integer value e.g. if port 1 is to be connected, the configuration should be ``endpoint_a_port: 1``
+
+``endpoint_b_hostname``
+^^^^^^^^^^^^^^^^^^^^^^^
+
+The ``hostname`` of the node which must be connected.
+
+``endpoint_b_port``
+^^^^^^^^^^^^^^^^^^^
+
+The port on ``endpoint_b_hostname`` which is to be connected to ``endpoint_a_port``.
+This accepts an integer value e.g. if port 1 is to be connected, the configuration should be ``endpoint_b_port: 1``
+
+``bandwidth``
+
+This is an integer value specifying the allowed bandwidth across the connection. Units are in Mbps.
+
+``airspace``
+------------
+
+This section configures settings specific to the wireless network's virtual airspace.
+
+``frequency_max_capacity_mbps``
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+This setting allows the user to override the default maximum bandwidth capacity set for each frequency. The key should
+be the AirSpaceFrequency name and the value be the desired maximum bandwidth capacity in mbps (megabits per second) for
+a single timestep.
+
+The below example would permit 123.45 megabits to be transmit across the WiFi 2.4 GHz frequency in a single timestep.
+Setting a frequencies max capacity to 0.0 blocks that frequency on the airspace.
+
+.. code-block:: yaml
+
+    simulation:
+      network:
+        airspace:
+          frequency_max_capacity_mbps:
+            WIFI_2_4: 123.45
+            WIFI_5: 0.0
--- a/docs/source/configuration/simulation/nodes/common/common.rst
+++ b/docs/source/configuration/simulation/nodes/common/common.rst
@@ -0,0 +1,35 @@
+.. only:: comment
+
+    © Crown-owned copyright 2025, Defence Science and Technology Laboratory UK
+
+.. _Node Attributes:
+
+Common Attributes
+#################
+
+Node Attributes
+===============
+
+Attributes that are shared by all nodes.
+
+.. include:: common_node_attributes.rst
+
+.. _Network Node Attributes:
+
+Network Node Attributes
+=======================
+
+Attributes that are shared by nodes that inherit from :py:mod:`primaite.simulator.network.hardware.nodes.network.network_node.NetworkNode`
+
+.. include:: common_host_node_attributes.rst
+
+.. _Host Node Attributes:
+
+Host Node Attributes
+====================
+
+Attributes that are shared by nodes that inherit from :py:mod:`primaite.simulator.network.hardware.nodes.host.host_node.HostNode`
+
+.. include:: common_host_node_attributes.rst
+
+.. |NODE| replace:: node
--- a/docs/source/configuration/simulation/nodes/common/common_host_node_attributes.rst
+++ b/docs/source/configuration/simulation/nodes/common/common_host_node_attributes.rst
@@ -0,0 +1,26 @@
+.. only:: comment
+
+    © Crown-owned copyright 2025, Defence Science and Technology Laboratory UK
+
+.. _common_host_node_attributes:
+
+``ip_address``
+--------------
+
+The IP address of the |NODE| in the network.
+
+``subnet_mask``
+---------------
+
+Optional. Default value is ``255.255.255.0``.
+
+The subnet mask for the |NODE| to use.
+
+``default_gateway``
+-------------------
+
+The IP address that the |NODE| will use as the default gateway. Typically, this is the IP address of the closest router that the |NODE| is connected to.
+
+.. include:: ../software/applications.rst
+
+.. include:: ../software/services.rst
--- a/docs/source/configuration/simulation/nodes/common/common_network_node_attributes.rst
+++ b/docs/source/configuration/simulation/nodes/common/common_network_node_attributes.rst
@@ -0,0 +1,51 @@
+.. only:: comment
+
+    © Crown-owned copyright 2025, Defence Science and Technology Laboratory UK
+
+.. _common_network_node_attributes:
+
+``routes``
+----------
+
+A list of routes which tells the |NODE| where to forward the packet to depending on the target IP address.
+
+e.g.
+
+.. code-block:: yaml
+
+    nodes:
+        - ref: node
+        ...
+        routes:
+            - address: 192.168.0.10
+            subnet_mask: 255.255.255.0
+            next_hop_ip_address: 192.168.1.1
+            metric: 0
+
+``address``
+"""""""""""
+
+The target IP address for the route. If the packet destination IP address matches this, the |NODE| will route the packet according to the ``next_hop_ip_address``.
+
+This must be a valid octet i.e. in the range of ``0.0.0.0`` and ``255.255.255.255``.
+
+``subnet_mask``
+"""""""""""""""
+
+Optional. Default value is ``255.255.255.0``.
+
+The subnet mask setting for the route.
+
+``next_hop_ip_address``
+"""""""""""""""""""""""
+
+The IP address of the next hop IP address that the packet will follow if the address matches the packet's destination IP address.
+
+This must be a valid octet i.e. in the range of ``0.0.0.0`` and ``255.255.255.255``.
+
+``metric``
+""""""""""
+
+Optional. Default value is ``0``. This value accepts floats.
+
+The cost or distance of a route. The higher the value, the more cost or distance is attributed to the route.
--- a/docs/source/configuration/simulation/nodes/common/common_node_attributes.rst
+++ b/docs/source/configuration/simulation/nodes/common/common_node_attributes.rst
@@ -0,0 +1,107 @@
+.. only:: comment
+
+    © Crown-owned copyright 2025, Defence Science and Technology Laboratory UK
+
+.. _common_node_attributes:
+
+``hostname``
+------------
+
+The hostname of the |NODE|. This will be used to reference the |NODE|.
+
+``operating_state``
+-------------------
+
+The initial operating state of the node.
+
+Optional. Default value is ``ON``.
+
+Options available are:
+
+- ``ON``
+- ``OFF``
+- ``BOOTING``
+- ``SHUTTING_DOWN``
+
+Note that YAML may assume non quoted ``ON`` and ``OFF`` as ``True`` and ``False`` respectively. To prevent this, use ``"ON"`` or ``"OFF"``
+
+See :py:mod:`primaite.simulator.network.hardware.node_operating_state.NodeOperatingState`
+
+
+``dns_server``
+--------------
+
+Optional. Default value is ``None``.
+
+The IP address of the node which holds an instance of the :ref:`DNSServer`. Some applications may use a domain name e.g. the :ref:`WebBrowser`
+
+``start_up_duration``
+---------------------
+
+Optional. Default value is ``3``.
+
+The number of time steps required to occur in order for the node to cycle from ``OFF`` to ``BOOTING_UP`` and then finally ``ON``.
+
+``shut_down_duration``
+----------------------
+
+Optional. Default value is ``3``.
+
+The number of time steps required to occur in order for the node to cycle from ``ON`` to ``SHUTTING_DOWN`` and then finally ``OFF``.
+
+``file_system``
+---------------
+
+Optional.
+
+The file system of the node. This configuration allows nodes to be initialised with files and/or folders.
+
+The file system takes a list of folders and files.
+
+Example:
+
+.. code-block:: yaml
+
+    simulation:
+      network:
+        nodes:
+        - hostname: client_1
+          type: computer
+          ip_address: 192.168.10.11
+          subnet_mask: 255.255.255.0
+          default_gateway: 192.168.10.1
+          file_system:
+            - empty_folder  # example of an empty folder
+            - downloads:
+              - "test_1.txt"  # files in the downloads folder
+              - "test_2.txt"
+            - root:
+              - passwords:  # example of file with size and type
+                  size: 69  # size in bytes
+                  type: TXT  # See FileType for list of available file types
+
+List of file types: :py:mod:`primaite.simulator.file_system.file_type.FileType`
+
+``users``
+---------
+
+The list of pre-existing users that are additional to the default admin user (``username=admin``, ``password=admin``).
+Additional users are configured as an array and must contain a ``username``, ``password``, and can contain an optional
+boolean ``is_admin``.
+
+Example of adding two additional users to a node:
+
+.. code-block:: yaml
+
+    simulation:
+      network:
+        nodes:
+        - hostname: [hostname]
+          type: [Node Type]
+          users:
+            - username: jane.doe
+              password: '1234'
+              is_admin: true
+            - username: john.doe
+              password: password_1
+              is_admin: false
--- a/docs/source/configuration/simulation/nodes/common/node_type_list.rst
+++ b/docs/source/configuration/simulation/nodes/common/node_type_list.rst
@@ -0,0 +1,19 @@
+.. only:: comment
+
+    © Crown-owned copyright 2025, Defence Science and Technology Laboratory UK
+
+``type``
+--------
+
+The type of node to add.
+
+Available options are:
+
+- ``computer``
+- ``firewall``
+- ``router``
+- ``wireless_router``
+- ``server``
+- ``switch``
+
+To create a |NODE|, type must be |NODE_TYPE|.
--- a/docs/source/configuration/simulation/nodes/computer.rst
+++ b/docs/source/configuration/simulation/nodes/computer.rst
@@ -0,0 +1,40 @@
+.. only:: comment
+
+    © Crown-owned copyright 2025, Defence Science and Technology Laboratory UK
+
+.. _computer_configuration:
+
+``computer``
+============
+
+A basic representation of a computer within the simulation.
+
+See :py:mod:`primaite.simulator.network.hardware.nodes.host.computer.Computer`
+
+example computer
+----------------
+
+.. code-block:: yaml
+
+    simulation:
+      network:
+        nodes:
+        - hostname: client_1
+          type: computer
+          ip_address: 192.168.0.10c
+          subnet_mask: 255.255.255.0
+          default_gateway: 192.168.0.1
+          dns_server: 192.168.1.10
+          applications:
+            ...
+          services:
+            ...
+
+.. include:: common/common_node_attributes.rst
+
+.. include:: common/node_type_list.rst
+
+.. include:: common/common_host_node_attributes.rst
+
+.. |NODE| replace:: computer
+.. |NODE_TYPE| replace:: ``computer``
--- a/docs/source/configuration/simulation/nodes/firewall.rst
+++ b/docs/source/configuration/simulation/nodes/firewall.rst
@@ -0,0 +1,297 @@
+.. only:: comment
+
+    © Crown-owned copyright 2025, Defence Science and Technology Laboratory UK
+
+.. _firewall_configuration:
+
+``firewall``
+============
+
+A basic representation of a network firewall within the simulation.
+
+The firewall is similar to how :ref:`Router <router_configuration>` works, with the difference being how firewall has specific ACL rules for inbound and outbound traffic as well as firewall being limited to 3 ports.
+
+See :py:mod:`primaite.simulator.network.hardware.nodes.network.firewall.Firewall`
+
+example firewall
+----------------
+
+.. code-block:: yaml
+
+    simulation:
+      network:
+        nodes:
+          - hostname: firewall
+            type: firewall
+            ports:
+              external_port: # port 1
+                ip_address: 192.168.20.1
+                subnet_mask: 255.255.255.0
+              internal_port: # port 2
+                ip_address: 192.168.1.2
+                subnet_mask: 255.255.255.0
+              dmz_port: # port 3
+                ip_address: 192.168.10.1
+                subnet_mask: 255.255.255.0
+            acl:
+              internal_inbound_acl:
+                ...
+              internal_outbound_acl:
+                ...
+              dmz_inbound_acl:
+                ...
+              dmz_outbound_acl:
+                ...
+              external_inbound_acl:
+                ...
+              external_outbound_acl:
+                ...
+            routes:
+                ...
+
+.. include:: common/common_node_attributes.rst
+
+.. include:: common/node_type_list.rst
+
+``ports``
+---------
+
+The firewall node only has 3 ports. These specifically are:
+
+- ``external_port`` (port 1)
+- ``internal_port`` (port 2)
+- ``dmz_port`` (port 3) (can be optional)
+
+The ports should be defined with an ip address and subnet mask e.g.
+
+.. code-block:: yaml
+
+    nodes:
+      - hostname: firewall
+      ...
+        ports:
+          external_port: # port 1
+            ip_address: 192.168.20.1
+            subnet_mask: 255.255.255.0
+          internal_port: # port 2
+            ip_address: 192.168.1.2
+            subnet_mask: 255.255.255.0
+          dmz_port: # port 3
+            ip_address: 192.168.10.1
+            subnet_mask: 255.255.255.0
+
+``ip_address``
+""""""""""""""
+
+The IP address for the given port. This must be a valid octet i.e. in the range of ``0.0.0.0`` and ``255.255.255.255``.
+
+``subnet_mask``
+"""""""""""""""
+
+Optional. Default value is ``255.255.255.0``.
+
+The subnet mask setting for the port.
+
+``acl``
+-------
+
+There are 6 ACLs that can be defined for a firewall
+
+- ``internal_inbound_acl`` for traffic going towards the internal network
+- ``internal_outbound_acl`` for traffic coming from the internal network
+- ``dmz_inbound_acl`` for traffic going towards the dmz network
+- ``dmz_outbound_acl`` for traffic coming from the dmz network
+- ``external_inbound_acl`` for traffic coming from the external network
+- ``external_outbound_acl`` for traffic going towards the external network
+
+.. image:: ../../../../_static/firewall_acl.png
+    :width: 500
+    :align: center
+
+By default, ``external_inbound_acl`` and ``external_outbound_acl`` will permit any traffic through.
+
+``internal_inbound_acl``, ``internal_outbound_acl``, ``dmz_inbound_acl`` and ``dmz_outbound_acl`` will deny any traffic by default, so must be configured to allow defined ``src_port`` and ``dst_port`` or ``protocol``.
+
+See :py:mod:`primaite.simulator.network.hardware.nodes.network.router.AccessControlList`
+
+See :ref:`List of Ports <List of Ports>` for a list of ports.
+
+``internal_inbound_acl``
+""""""""""""""""""""""""
+
+ACL rules for packets that have a destination IP address in what is considered the internal network.
+
+example:
+
+.. code-block:: yaml
+
+    nodes:
+      - hostname: firewall
+        ...
+        acl:
+          internal_inbound_acl:
+            21: # position 21 on ACL list
+              action: PERMIT  # allow packets that
+              src_port: POSTGRES_SERVER   # are emitted from the POSTGRES_SERVER port
+              dst_port: POSTGRES_SERVER   # are going towards an POSTGRES_SERVER port
+            22: # position 22 on ACL list
+              action: PERMIT  # allow packets that
+              src_port: ARP   # are emitted from the ARP port
+              dst_port: ARP   # are going towards an ARP port
+            23: # position 23 on ACL list
+              action: PERMIT  # allow packets that
+              protocol: ICMP  # are ICMP
+
+``internal_outbound_acl``
+"""""""""""""""""""""""""
+
+ACL rules for packets that have a source IP address in what is considered the internal network and is going towards the DMZ network or the external network.
+
+example:
+
+.. code-block:: yaml
+
+    nodes:
+      - hostname: firewall
+      ...
+        acl:
+          internal_outbound_acl:
+            21: # position 21 on ACL list
+              action: PERMIT  # allow packets that
+              src_port: POSTGRES_SERVER   # are emitted from the POSTGRES_SERVER port
+              dst_port: POSTGRES_SERVER   # are going towards an POSTGRES_SERVER port
+            22: # position 22 on ACL list
+              action: PERMIT  # allow packets that
+              src_port: ARP   # are emitted from the ARP port
+              dst_port: ARP   # are going towards an ARP port
+            23: # position 23 on ACL list
+              action: PERMIT  # allow packets that
+              protocol: ICMP  # are ICMP
+
+
+``dmz_inbound_acl``
+"""""""""""""""""""
+
+ACL rules for packets that have a destination IP address in what is considered the DMZ network.
+
+example:
+
+.. code-block:: yaml
+
+    nodes:
+        - ref: firewall
+        ...
+        acl:
+            dmz_inbound_acl:
+                19: # position 19 on ACL list
+                    action: PERMIT  # allow packets that
+                    src_port: POSTGRES_SERVER   # are emitted from the POSTGRES_SERVER port
+                    dst_port: POSTGRES_SERVER   # are going towards an POSTGRES_SERVER port
+                20: # position 20 on ACL list
+                    action: PERMIT  # allow packets that
+                    src_port: HTTP   # are emitted from the HTTP port
+                    dst_port: HTTP   # are going towards an HTTP port
+                21: # position 21 on ACL list
+                    action: PERMIT  # allow packets that
+                    src_port: HTTPS   # are emitted from the HTTPS port
+                    dst_port: HTTPS   # are going towards an HTTPS port
+                22: # position 22 on ACL list
+                    action: PERMIT  # allow packets that
+                    src_port: ARP   # are emitted from the ARP port
+                    dst_port: ARP   # are going towards an ARP port
+                23: # position 23 on ACL list
+                    action: PERMIT  # allow packets that
+                    protocol: ICMP  # are ICMP
+
+``dmz_outbound_acl``
+""""""""""""""""""""
+
+ACL rules for packets that have a source IP address in what is considered the DMZ network and is going towards the internal network or the external network.
+
+example:
+
+.. code-block:: yaml
+
+    nodes:
+      - hostname: firewall
+      ...
+      acl:
+        dmz_outbound_acl:
+          19: # position 19 on ACL list
+            action: PERMIT  # allow packets that
+            src_port: POSTGRES_SERVER   # are emitted from the POSTGRES_SERVER port
+            dst_port: POSTGRES_SERVER   # are going towards an POSTGRES_SERVER port
+          20: # position 20 on ACL list
+            action: PERMIT  # allow packets that
+            src_port: HTTP   # are emitted from the HTTP port
+            dst_port: HTTP   # are going towards an HTTP port
+          21: # position 21 on ACL list
+            action: PERMIT  # allow packets that
+            src_port: HTTPS   # are emitted from the HTTPS port
+            dst_port: HTTPS   # are going towards an HTTPS port
+          22: # position 22 on ACL list
+            action: PERMIT  # allow packets that
+            src_port: ARP   # are emitted from the ARP port
+            dst_port: ARP   # are going towards an ARP port
+          23: # position 23 on ACL list
+            action: PERMIT  # allow packets that
+            protocol: ICMP  # are ICMP
+
+
+
+``external_inbound_acl``
+""""""""""""""""""""""""
+
+Optional. By default, this will allow any traffic through.
+
+ACL rules for packets that have a destination IP address in what is considered the external network.
+
+example:
+
+.. code-block:: yaml
+
+    nodes:
+      - hostname: firewall
+      ...
+      acl:
+        external_inbound_acl:
+          21: # position 19 on ACL list
+            action: DENY  # deny packets that
+            src_port: POSTGRES_SERVER   # are emitted from the POSTGRES_SERVER port
+            dst_port: POSTGRES_SERVER   # are going towards an POSTGRES_SERVER port
+          22: # position 22 on ACL list
+            action: PERMIT  # allow packets that
+            src_port: ARP   # are emitted from the ARP port
+            dst_port: ARP   # are going towards an ARP port
+          23: # position 23 on ACL list
+            action: PERMIT  # allow packets that
+            protocol: ICMP  # are ICMP
+
+``external_outbound_acl``
+"""""""""""""""""""""""""
+
+Optional. By default, this will allow any traffic through.
+
+ACL rules for packets that have a source IP address in what is considered the external network and is going towards the DMZ network or the internal network.
+
+example:
+
+.. code-block:: yaml
+
+    nodes:
+      - hotsname: firewall
+      ...
+        acl:
+          external_outbound_acl:
+            22: # position 22 on ACL list
+              action: PERMIT  # allow packets that
+              src_port: ARP   # are emitted from the ARP port
+              dst_port: ARP   # are going towards an ARP port
+            23: # position 23 on ACL list
+              action: PERMIT  # allow packets that
+              protocol: ICMP  # are ICMP
+
+.. include:: common/common_network_node_attributes.rst
+
+.. |NODE| replace:: firewall
+.. |NODE_TYPE| replace:: ``firewall``
--- a/docs/source/configuration/simulation/nodes/images/primaite_example_basic_lan_network_dark.png
+++ b/docs/source/configuration/simulation/nodes/images/primaite_example_basic_lan_network_dark.png
--- a/Show More
+++ b/Show More