From 4371ca13fc0b655aeb2d4d20b365ef400f5db68b Mon Sep 17 00:00:00 2001
From: SunilSamra <sunil-samra@nsc.co.uk>
Date: Thu, 6 Jul 2023 11:12:51 +0100
Subject: [PATCH 01/32] 1566 - added train_episodes, train_steps, eval_episodes
 and eval_steps to training_config_main.yaml

---
 .../training/training_config_main.yaml           | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/src/primaite/config/_package_data/training/training_config_main.yaml b/src/primaite/config/_package_data/training/training_config_main.yaml
index a638fe14..dc94e3bb 100644
--- a/src/primaite/config/_package_data/training/training_config_main.yaml
+++ b/src/primaite/config/_package_data/training/training_config_main.yaml
@@ -49,11 +49,19 @@ observation_space:
     - name: NODE_LINK_TABLE
     # - name: NODE_STATUSES
     # - name: LINK_TRAFFIC_LEVELS
-# Number of episodes to run per session
-num_episodes: 10
 
-# Number of time_steps per episode
-num_steps: 256
+
+# Number of episodes for training to run per session
+num_train_episodes: 10
+
+# Number of time_steps for training per episode
+num_train_steps: 256
+
+# Number of episodes for evaluation to run per session
+num_eval_episodes: 10
+
+# Number of time_steps for evaluation per episode
+num_eval_steps: 256
 
 # Sets how often the agent will save a checkpoint (every n time episodes).
 # Set to 0 if no checkpoints are required. Default is 10

From e03c29b921705e127b6cd1d844af9df4927950f7 Mon Sep 17 00:00:00 2001
From: SunilSamra <sunil-samra@nsc.co.uk>
Date: Fri, 7 Jul 2023 14:13:47 +0100
Subject: [PATCH 02/32] 1566 - added test file and edited configs to include
 types of num steps and modifed agents to use correct step and episode counts

---
 src/primaite/agents/agent.py                  |   4 +-
 src/primaite/agents/rllib.py                  |  10 +-
 src/primaite/agents/sb3.py                    |  11 +-
 src/primaite/config/training_config.py        |  30 +++-
 src/primaite/environment/primaite_env.py      |   8 +-
 .../new_training_config.yaml                  |  15 +-
 .../main_config_LINK_TRAFFIC_LEVELS.yaml      |  15 +-
 .../main_config_NODE_LINK_TABLE.yaml          |  15 +-
 .../obs_tests/main_config_NODE_STATUSES.yaml  |  15 +-
 .../obs_tests/main_config_without_obs.yaml    |  15 +-
 .../one_node_states_on_off_main_config.yaml   |  15 +-
 ..._space_fixed_blue_actions_main_config.yaml |  15 +-
 .../single_action_space_main_config.yaml      |  15 +-
 tests/config/test_random_red_main_config.yaml |  15 +-
 tests/config/train_episode_step.yaml          | 153 ++++++++++++++++++
 .../config/train_eval_check_episode_step.yaml | 153 ++++++++++++++++++
 tests/config/training_config_main.yaml        | 153 ++++++++++++++++++
 tests/conftest.py                             |   5 +-
 tests/test_single_action_space.py             |   4 +-
 tests/test_train_eval_episode_steps.py        |  46 ++++++
 20 files changed, 652 insertions(+), 60 deletions(-)
 create mode 100644 tests/config/train_episode_step.yaml
 create mode 100644 tests/config/train_eval_check_episode_step.yaml
 create mode 100644 tests/config/training_config_main.yaml
 create mode 100644 tests/test_train_eval_episode_steps.py

diff --git a/src/primaite/agents/agent.py b/src/primaite/agents/agent.py
index 685fe776..4eb398b4 100644
--- a/src/primaite/agents/agent.py
+++ b/src/primaite/agents/agent.py
@@ -348,8 +348,8 @@ class HardCodedAgentSessionABC(AgentSessionABC):
         self._env.set_as_eval()  # noqa
         self.is_eval = True
 
-        time_steps = self._training_config.num_steps
-        episodes = self._training_config.num_episodes
+        time_steps = self._training_config.num_eval_steps
+        episodes = self._training_config.num_eval_episodes
 
         obs = self._env.reset()
         for episode in range(episodes):
diff --git a/src/primaite/agents/rllib.py b/src/primaite/agents/rllib.py
index d851ba9c..443598e7 100644
--- a/src/primaite/agents/rllib.py
+++ b/src/primaite/agents/rllib.py
@@ -107,13 +107,13 @@ class RLlibAgent(AgentSessionABC):
             ),
         )
 
-        self._agent_config.training(train_batch_size=self._training_config.num_steps)
+        self._agent_config.training(train_batch_size=self._training_config.num_train_steps)
         self._agent_config.framework(framework="tf")
 
         self._agent_config.rollouts(
             num_rollout_workers=1,
             num_envs_per_worker=1,
-            horizon=self._training_config.num_steps,
+            horizon=self._training_config.num_train_steps,
         )
         self._agent: Algorithm = self._agent_config.build(logger_creator=_custom_log_creator(self.learning_path))
 
@@ -121,7 +121,7 @@ class RLlibAgent(AgentSessionABC):
         checkpoint_n = self._training_config.checkpoint_every_n_episodes
         episode_count = self._current_result["episodes_total"]
         if checkpoint_n > 0 and episode_count > 0:
-            if (episode_count % checkpoint_n == 0) or (episode_count == self._training_config.num_episodes):
+            if (episode_count % checkpoint_n == 0) or (episode_count == self._training_config.num_train_episodes):
                 self._agent.save(str(self.checkpoints_path))
 
     def learn(
@@ -133,8 +133,8 @@ class RLlibAgent(AgentSessionABC):
 
         :param kwargs: Any agent-specific key-word args to be passed.
         """
-        time_steps = self._training_config.num_steps
-        episodes = self._training_config.num_episodes
+        time_steps = self._training_config.num_train_steps
+        episodes = self._training_config.num_train_episodes
 
         _LOGGER.info(f"Beginning learning for {episodes} episodes @" f" {time_steps} time steps...")
         for i in range(episodes):
diff --git a/src/primaite/agents/sb3.py b/src/primaite/agents/sb3.py
index f5ac44cb..17827ff4 100644
--- a/src/primaite/agents/sb3.py
+++ b/src/primaite/agents/sb3.py
@@ -53,11 +53,12 @@ class SB3Agent(AgentSessionABC):
             session_path=self.session_path,
             timestamp_str=self.timestamp_str,
         )
+
         self._agent = self._agent_class(
             PPOMlp,
             self._env,
             verbose=self.sb3_output_verbose_level,
-            n_steps=self._training_config.num_steps,
+            n_steps=self._training_config.num_eval_steps,
             tensorboard_log=str(self._tensorboard_log_path),
         )
 
@@ -82,8 +83,8 @@ class SB3Agent(AgentSessionABC):
 
         :param kwargs: Any agent-specific key-word args to be passed.
         """
-        time_steps = self._training_config.num_steps
-        episodes = self._training_config.num_episodes
+        time_steps = self._training_config.num_train_steps
+        episodes = self._training_config.num_train_episodes
         self.is_eval = False
         _LOGGER.info(f"Beginning learning for {episodes} episodes @" f" {time_steps} time steps...")
         for i in range(episodes):
@@ -104,8 +105,8 @@ class SB3Agent(AgentSessionABC):
         :param deterministic: Whether the evaluation is deterministic.
         :param kwargs: Any agent-specific key-word args to be passed.
         """
-        time_steps = self._training_config.num_steps
-        episodes = self._training_config.num_episodes
+        time_steps = self._training_config.num_eval_steps
+        episodes = self._training_config.num_eval_episodes
         self._env.set_as_eval()
         self.is_eval = True
         if deterministic:
diff --git a/src/primaite/config/training_config.py b/src/primaite/config/training_config.py
index bd73f65b..018fd982 100644
--- a/src/primaite/config/training_config.py
+++ b/src/primaite/config/training_config.py
@@ -60,11 +60,17 @@ class TrainingConfig:
     action_type: ActionType = ActionType.ANY
     "The ActionType to use"
 
-    num_episodes: int = 10
-    "The number of episodes to train over"
+    num_train_episodes: int = 10
+    "The number of episodes to train over during an training session"
 
-    num_steps: int = 256
-    "The number of steps in an episode"
+    num_train_steps: int = 256
+    "The number of steps in an episode during an training session"
+
+    num_eval_episodes: int = 10
+    "The number of episodes to train over during an evaluation session"
+
+    num_eval_steps: int = 256
+    "The number of steps in an episode during an evaluation session"
 
     checkpoint_every_n_episodes: int = 5
     "The agent will save a checkpoint every n episodes"
@@ -230,8 +236,17 @@ class TrainingConfig:
             tc += f"{self.hard_coded_agent_view}, "
         tc += f"{self.action_type}, "
         tc += f"observation_space={self.observation_space}, "
-        tc += f"{self.num_episodes} episodes @ "
-        tc += f"{self.num_steps} steps"
+        if self.session_type.name == "TRAIN":
+            tc += f"{self.num_train_episodes} episodes @ "
+            tc += f"{self.num_train_steps} steps"
+        elif self.session_type.name == "EVAL":
+            tc += f"{self.num_eval_episodes} episodes @ "
+            tc += f"{self.num_eval_steps} steps"
+        else:
+            tc += f"Training: {self.num_eval_episodes} episodes @ "
+            tc += f"{self.num_eval_steps} steps"
+            tc += f"Evaluation: {self.num_eval_episodes} episodes @ "
+            tc += f"{self.num_eval_steps} steps"
         return tc
 
 
@@ -320,7 +335,8 @@ def _get_new_key_from_legacy(legacy_key: str) -> str:
     """
     key_mapping = {
         "agentIdentifier": None,
-        "numEpisodes": "num_episodes",
+        "numEpisodes": "num_train_episodes",
+        "numSteps": "num_train_steps",
         "timeDelay": "time_delay",
         "configFilename": None,
         "sessionType": "session_type",
diff --git a/src/primaite/environment/primaite_env.py b/src/primaite/environment/primaite_env.py
index 03c23f93..c7e67e34 100644
--- a/src/primaite/environment/primaite_env.py
+++ b/src/primaite/environment/primaite_env.py
@@ -85,7 +85,12 @@ class Primaite(Env):
         _LOGGER.info(f"Using: {str(self.training_config)}")
 
         # Number of steps in an episode
-        self.episode_steps = self.training_config.num_steps
+        if self.training_config.session_type == SessionType.TRAIN:
+            self.episode_steps = self.training_config.num_train_steps
+        elif self.training_config.session_type == SessionType.EVAL:
+            self.episode_steps = self.training_config.num_eval_steps
+        else:
+            self.episode_steps = self.training_config.num_train_steps
 
         super(Primaite, self).__init__()
 
@@ -254,6 +259,7 @@ class Primaite(Env):
         self.episode_count = 0
         self.step_count = 0
         self.total_step_count = 0
+        self.episode_steps = self.training_config.num_eval_steps
 
     def reset(self):
         """
diff --git a/tests/config/legacy_conversion/new_training_config.yaml b/tests/config/legacy_conversion/new_training_config.yaml
index 49e6a00b..5ca80742 100644
--- a/tests/config/legacy_conversion/new_training_config.yaml
+++ b/tests/config/legacy_conversion/new_training_config.yaml
@@ -20,10 +20,17 @@ agent_identifier: PPO
 # "ACL"
 # "ANY" node and acl actions
 action_type: ANY
-# Number of episodes to run per session
-num_episodes: 10
-# Number of time_steps per episode
-num_steps: 256
+# Number of episodes for training to run per session
+num_train_episodes: 10
+
+# Number of time_steps for training per episode
+num_train_steps: 256
+
+# Number of episodes for evaluation to run per session
+num_eval_episodes: 10
+
+# Number of time_steps for evaluation per episode
+num_eval_steps: 256
 # Time delay between steps (for generic agents)
 time_delay: 10
 # Type of session to be run (TRAINING or EVALUATION)
diff --git a/tests/config/obs_tests/main_config_LINK_TRAFFIC_LEVELS.yaml b/tests/config/obs_tests/main_config_LINK_TRAFFIC_LEVELS.yaml
index d26d7955..bbdce9c1 100644
--- a/tests/config/obs_tests/main_config_LINK_TRAFFIC_LEVELS.yaml
+++ b/tests/config/obs_tests/main_config_LINK_TRAFFIC_LEVELS.yaml
@@ -22,10 +22,17 @@ agent_identifier: A2C
 # "ACL"
 # "ANY" node and acl actions
 action_type: ANY
-# Number of episodes to run per session
-num_episodes: 1
-# Number of time_steps per episode
-num_steps: 5
+# Number of episodes for training to run per session
+num_train_episodes: 10
+
+# Number of time_steps for training per episode
+num_train_steps: 256
+
+# Number of episodes for evaluation to run per session
+num_eval_episodes: 10
+
+# Number of time_steps for evaluation per episode
+num_eval_steps: 256
 
 
 observation_space:
diff --git a/tests/config/obs_tests/main_config_NODE_LINK_TABLE.yaml b/tests/config/obs_tests/main_config_NODE_LINK_TABLE.yaml
index aae740b6..41b3e588 100644
--- a/tests/config/obs_tests/main_config_NODE_LINK_TABLE.yaml
+++ b/tests/config/obs_tests/main_config_NODE_LINK_TABLE.yaml
@@ -22,10 +22,17 @@ agent_identifier: RANDOM
 # "ACL"
 # "ANY" node and acl actions
 action_type: ANY
-# Number of episodes to run per session
-num_episodes: 1
-# Number of time_steps per episode
-num_steps: 5
+# Number of episodes for training to run per session
+num_train_episodes: 10
+
+# Number of time_steps for training per episode
+num_train_steps: 256
+
+# Number of episodes for evaluation to run per session
+num_eval_episodes: 10
+
+# Number of time_steps for evaluation per episode
+num_eval_steps: 256
 
 observation_space:
   components:
diff --git a/tests/config/obs_tests/main_config_NODE_STATUSES.yaml b/tests/config/obs_tests/main_config_NODE_STATUSES.yaml
index 4066eace..34758199 100644
--- a/tests/config/obs_tests/main_config_NODE_STATUSES.yaml
+++ b/tests/config/obs_tests/main_config_NODE_STATUSES.yaml
@@ -22,10 +22,17 @@ agent_identifier: RANDOM
 # "ACL"
 # "ANY" node and acl actions
 action_type: ANY
-# Number of episodes to run per session
-num_episodes: 1
-# Number of time_steps per episode
-num_steps: 5
+# Number of episodes for training to run per session
+num_train_episodes: 10
+
+# Number of time_steps for training per episode
+num_train_steps: 256
+
+# Number of episodes for evaluation to run per session
+num_eval_episodes: 10
+
+# Number of time_steps for evaluation per episode
+num_eval_steps: 256
 
 observation_space:
   components:
diff --git a/tests/config/obs_tests/main_config_without_obs.yaml b/tests/config/obs_tests/main_config_without_obs.yaml
index 08452dda..352e765c 100644
--- a/tests/config/obs_tests/main_config_without_obs.yaml
+++ b/tests/config/obs_tests/main_config_without_obs.yaml
@@ -22,10 +22,17 @@ agent_identifier: RANDOM
 # "ACL"
 # "ANY" node and acl actions
 action_type: ANY
-# Number of episodes to run per session
-num_episodes: 1
-# Number of time_steps per episode
-num_steps: 5
+# Number of episodes for training to run per session
+num_train_episodes: 10
+
+# Number of time_steps for training per episode
+num_train_steps: 256
+
+# Number of episodes for evaluation to run per session
+num_eval_episodes: 10
+
+# Number of time_steps for evaluation per episode
+num_eval_steps: 256
 # Time delay between steps (for generic agents)
 time_delay: 1
 # Type of session to be run (TRAINING or EVALUATION)
diff --git a/tests/config/one_node_states_on_off_main_config.yaml b/tests/config/one_node_states_on_off_main_config.yaml
index 7f1ced01..63fdd1a5 100644
--- a/tests/config/one_node_states_on_off_main_config.yaml
+++ b/tests/config/one_node_states_on_off_main_config.yaml
@@ -22,10 +22,17 @@ agent_identifier: DUMMY
 # "ACL"
 # "ANY" node and acl actions
 action_type: NODE
-# Number of episodes to run per session
-num_episodes: 1
-# Number of time_steps per episode
-num_steps: 15
+# Number of episodes for training to run per session
+num_train_episodes: 10
+
+# Number of time_steps for training per episode
+num_train_steps: 256
+
+# Number of episodes for evaluation to run per session
+num_eval_episodes: 10
+
+# Number of time_steps for evaluation per episode
+num_eval_steps: 256
 # Time delay between steps (for generic agents)
 time_delay: 1
 
diff --git a/tests/config/single_action_space_fixed_blue_actions_main_config.yaml b/tests/config/single_action_space_fixed_blue_actions_main_config.yaml
index 97d0ddaf..859b2ab3 100644
--- a/tests/config/single_action_space_fixed_blue_actions_main_config.yaml
+++ b/tests/config/single_action_space_fixed_blue_actions_main_config.yaml
@@ -22,10 +22,17 @@ agent_identifier: RANDOM
 # "ACL"
 # "ANY" node and acl actions
 action_type: ANY
-# Number of episodes to run per session
-num_episodes: 1
-# Number of time_steps per episode
-num_steps: 15
+# Number of episodes for training to run per session
+num_train_episodes: 10
+
+# Number of time_steps for training per episode
+num_train_steps: 256
+
+# Number of episodes for evaluation to run per session
+num_eval_episodes: 10
+
+# Number of time_steps for evaluation per episode
+num_eval_steps: 256
 # Time delay between steps (for generic agents)
 time_delay: 1
 # Type of session to be run (TRAINING or EVALUATION)
diff --git a/tests/config/single_action_space_main_config.yaml b/tests/config/single_action_space_main_config.yaml
index 067b9a6d..c875757f 100644
--- a/tests/config/single_action_space_main_config.yaml
+++ b/tests/config/single_action_space_main_config.yaml
@@ -22,10 +22,17 @@ agent_identifier: RANDOM
 # "ACL"
 # "ANY" node and acl actions
 action_type: ANY
-# Number of episodes to run per session
-num_episodes: 1
-# Number of time_steps per episode
-num_steps: 5
+# Number of episodes for training to run per session
+num_train_episodes: 10
+
+# Number of time_steps for training per episode
+num_train_steps: 256
+
+# Number of episodes for evaluation to run per session
+num_eval_episodes: 10
+
+# Number of time_steps for evaluation per episode
+num_eval_steps: 256
 # Time delay between steps (for generic agents)
 time_delay: 1
 # Type of session to be run (TRAINING or EVALUATION)
diff --git a/tests/config/test_random_red_main_config.yaml b/tests/config/test_random_red_main_config.yaml
index 800fe808..e0fc40ee 100644
--- a/tests/config/test_random_red_main_config.yaml
+++ b/tests/config/test_random_red_main_config.yaml
@@ -28,10 +28,17 @@ random_red_agent: True
 # "ACL"
 # "ANY" node and acl actions
 action_type: NODE
-# Number of episodes to run per session
-num_episodes: 2
-# Number of time_steps per episode
-num_steps: 15
+# Number of episodes for training to run per session
+num_train_episodes: 10
+
+# Number of time_steps for training per episode
+num_train_steps: 256
+
+# Number of episodes for evaluation to run per session
+num_eval_episodes: 10
+
+# Number of time_steps for evaluation per episode
+num_eval_steps: 256
 # Time delay between steps (for generic agents)
 time_delay: 1
 
diff --git a/tests/config/train_episode_step.yaml b/tests/config/train_episode_step.yaml
new file mode 100644
index 00000000..550b95fd
--- /dev/null
+++ b/tests/config/train_episode_step.yaml
@@ -0,0 +1,153 @@
+# Training Config File
+
+# Sets which agent algorithm framework will be used.
+# Options are:
+# "SB3" (Stable Baselines3)
+# "RLLIB" (Ray RLlib)
+# "CUSTOM" (Custom Agent)
+agent_framework: SB3
+
+# Sets which deep learning framework will be used (by RLlib ONLY).
+# Default is TF (Tensorflow).
+# Options are:
+# "TF" (Tensorflow)
+# TF2 (Tensorflow 2.X)
+# TORCH (PyTorch)
+deep_learning_framework: TF2
+
+# Sets which Agent class will be used.
+# Options are:
+# "A2C" (Advantage Actor Critic coupled with either SB3 or RLLIB agent_framework)
+# "PPO" (Proximal Policy Optimization coupled with either SB3 or RLLIB agent_framework)
+# "HARDCODED" (The HardCoded agents coupled with an ACL or NODE action_type)
+# "DO_NOTHING" (The DoNothing agents coupled with an ACL or NODE action_type)
+# "RANDOM" (primaite.agents.simple.RandomAgent)
+# "DUMMY" (primaite.agents.simple.DummyAgent)
+agent_identifier: PPO
+
+# Sets whether Red Agent POL and IER is randomised.
+# Options are:
+# True
+# False
+random_red_agent: False
+
+# Sets what view of the environment the deterministic hardcoded agent has. The default is BASIC.
+# Options are:
+# "BASIC" (The current observation space only)
+# "FULL" (Full environment view with actions taken and reward feedback)
+hard_coded_agent_view: FULL
+
+# Sets How the Action Space is defined:
+# "NODE"
+# "ACL"
+# "ANY" node and acl actions
+action_type: NODE
+# observation space
+observation_space:
+  # flatten: true
+  components:
+    - name: NODE_LINK_TABLE
+    # - name: NODE_STATUSES
+    # - name: LINK_TRAFFIC_LEVELS
+
+
+# Number of episodes for training to run per session
+num_train_episodes: 30
+
+# Number of time_steps for training per episode
+num_train_steps: 1
+
+# Number of episodes for evaluation to run per session
+num_eval_episodes: 10
+
+# Number of time_steps for evaluation per episode
+num_eval_steps: 10
+
+# Sets how often the agent will save a checkpoint (every n time episodes).
+# Set to 0 if no checkpoints are required. Default is 10
+checkpoint_every_n_episodes: 10
+
+# Time delay (milliseconds) between steps for CUSTOM agents.
+time_delay: 5
+
+# Type of session to be run. Options are:
+# "TRAIN" (Trains an agent)
+# "EVAL" (Evaluates an agent)
+# "TRAIN_EVAL" (Trains then evaluates an agent)
+session_type: EVAL
+
+# Environment config values
+# The high value for the observation space
+observation_space_high_value: 1000000000
+
+# The Stable Baselines3 learn/eval output verbosity level:
+# Options are:
+# "NONE" (No Output)
+# "INFO" (Info Messages (such as devices and wrappers used))
+# "DEBUG" (All Messages)
+sb3_output_verbose_level: NONE
+
+# Reward values
+# Generic
+all_ok: 0
+# Node Hardware State
+off_should_be_on: -10
+off_should_be_resetting: -5
+on_should_be_off: -2
+on_should_be_resetting: -5
+resetting_should_be_on: -5
+resetting_should_be_off: -2
+resetting: -3
+# Node Software or Service State
+good_should_be_patching: 2
+good_should_be_compromised: 5
+good_should_be_overwhelmed: 5
+patching_should_be_good: -5
+patching_should_be_compromised: 2
+patching_should_be_overwhelmed: 2
+patching: -3
+compromised_should_be_good: -20
+compromised_should_be_patching: -20
+compromised_should_be_overwhelmed: -20
+compromised: -20
+overwhelmed_should_be_good: -20
+overwhelmed_should_be_patching: -20
+overwhelmed_should_be_compromised: -20
+overwhelmed: -20
+# Node File System State
+good_should_be_repairing: 2
+good_should_be_restoring: 2
+good_should_be_corrupt: 5
+good_should_be_destroyed: 10
+repairing_should_be_good: -5
+repairing_should_be_restoring: 2
+repairing_should_be_corrupt: 2
+repairing_should_be_destroyed: 0
+repairing: -3
+restoring_should_be_good: -10
+restoring_should_be_repairing: -2
+restoring_should_be_corrupt: 1
+restoring_should_be_destroyed: 2
+restoring: -6
+corrupt_should_be_good: -10
+corrupt_should_be_repairing: -10
+corrupt_should_be_restoring: -10
+corrupt_should_be_destroyed: 2
+corrupt: -10
+destroyed_should_be_good: -20
+destroyed_should_be_repairing: -20
+destroyed_should_be_restoring: -20
+destroyed_should_be_corrupt: -20
+destroyed: -20
+scanning: -2
+# IER status
+red_ier_running: -5
+green_ier_blocked: -10
+
+# Patching / Reset durations
+os_patching_duration: 5            # The time taken to patch the OS
+node_reset_duration: 5             # The time taken to reset a node (hardware)
+service_patching_duration: 5       # The time taken to patch a service
+file_system_repairing_limit: 5      # The time take to repair the file system
+file_system_restoring_limit: 5      # The time take to restore the file system
+file_system_scanning_limit: 5       # The time taken to scan the file system
diff --git a/tests/config/train_eval_check_episode_step.yaml b/tests/config/train_eval_check_episode_step.yaml
new file mode 100644
index 00000000..f616116e
--- /dev/null
+++ b/tests/config/train_eval_check_episode_step.yaml
@@ -0,0 +1,153 @@
+# Training Config File
+
+# Sets which agent algorithm framework will be used.
+# Options are:
+# "SB3" (Stable Baselines3)
+# "RLLIB" (Ray RLlib)
+# "CUSTOM" (Custom Agent)
+agent_framework: SB3
+
+# Sets which deep learning framework will be used (by RLlib ONLY).
+# Default is TF (Tensorflow).
+# Options are:
+# "TF" (Tensorflow)
+# TF2 (Tensorflow 2.X)
+# TORCH (PyTorch)
+deep_learning_framework: TF2
+
+# Sets which Agent class will be used.
+# Options are:
+# "A2C" (Advantage Actor Critic coupled with either SB3 or RLLIB agent_framework)
+# "PPO" (Proximal Policy Optimization coupled with either SB3 or RLLIB agent_framework)
+# "HARDCODED" (The HardCoded agents coupled with an ACL or NODE action_type)
+# "DO_NOTHING" (The DoNothing agents coupled with an ACL or NODE action_type)
+# "RANDOM" (primaite.agents.simple.RandomAgent)
+# "DUMMY" (primaite.agents.simple.DummyAgent)
+agent_identifier: PPO
+
+# Sets whether Red Agent POL and IER is randomised.
+# Options are:
+# True
+# False
+random_red_agent: False
+
+# Sets what view of the environment the deterministic hardcoded agent has. The default is BASIC.
+# Options are:
+# "BASIC" (The current observation space only)
+# "FULL" (Full environment view with actions taken and reward feedback)
+hard_coded_agent_view: FULL
+
+# Sets How the Action Space is defined:
+# "NODE"
+# "ACL"
+# "ANY" node and acl actions
+action_type: NODE
+# observation space
+observation_space:
+  # flatten: true
+  components:
+    - name: NODE_LINK_TABLE
+    # - name: NODE_STATUSES
+    # - name: LINK_TRAFFIC_LEVELS
+
+
+# Number of episodes for training to run per session
+num_train_episodes: 30
+
+# Number of time_steps for training per episode
+num_train_steps: 1
+
+# Number of episodes for evaluation to run per session
+num_eval_episodes: 10
+
+# Number of time_steps for evaluation per episode
+num_eval_steps: 10
+
+# Sets how often the agent will save a checkpoint (every n time episodes).
+# Set to 0 if no checkpoints are required. Default is 10
+checkpoint_every_n_episodes: 10
+
+# Time delay (milliseconds) between steps for CUSTOM agents.
+time_delay: 5
+
+# Type of session to be run. Options are:
+# "TRAIN" (Trains an agent)
+# "EVAL" (Evaluates an agent)
+# "TRAIN_EVAL" (Trains then evaluates an agent)
+session_type: TRAIN
+
+# Environment config values
+# The high value for the observation space
+observation_space_high_value: 1000000000
+
+# The Stable Baselines3 learn/eval output verbosity level:
+# Options are:
+# "NONE" (No Output)
+# "INFO" (Info Messages (such as devices and wrappers used))
+# "DEBUG" (All Messages)
+sb3_output_verbose_level: NONE
+
+# Reward values
+# Generic
+all_ok: 0
+# Node Hardware State
+off_should_be_on: -10
+off_should_be_resetting: -5
+on_should_be_off: -2
+on_should_be_resetting: -5
+resetting_should_be_on: -5
+resetting_should_be_off: -2
+resetting: -3
+# Node Software or Service State
+good_should_be_patching: 2
+good_should_be_compromised: 5
+good_should_be_overwhelmed: 5
+patching_should_be_good: -5
+patching_should_be_compromised: 2
+patching_should_be_overwhelmed: 2
+patching: -3
+compromised_should_be_good: -20
+compromised_should_be_patching: -20
+compromised_should_be_overwhelmed: -20
+compromised: -20
+overwhelmed_should_be_good: -20
+overwhelmed_should_be_patching: -20
+overwhelmed_should_be_compromised: -20
+overwhelmed: -20
+# Node File System State
+good_should_be_repairing: 2
+good_should_be_restoring: 2
+good_should_be_corrupt: 5
+good_should_be_destroyed: 10
+repairing_should_be_good: -5
+repairing_should_be_restoring: 2
+repairing_should_be_corrupt: 2
+repairing_should_be_destroyed: 0
+repairing: -3
+restoring_should_be_good: -10
+restoring_should_be_repairing: -2
+restoring_should_be_corrupt: 1
+restoring_should_be_destroyed: 2
+restoring: -6
+corrupt_should_be_good: -10
+corrupt_should_be_repairing: -10
+corrupt_should_be_restoring: -10
+corrupt_should_be_destroyed: 2
+corrupt: -10
+destroyed_should_be_good: -20
+destroyed_should_be_repairing: -20
+destroyed_should_be_restoring: -20
+destroyed_should_be_corrupt: -20
+destroyed: -20
+scanning: -2
+# IER status
+red_ier_running: -5
+green_ier_blocked: -10
+
+# Patching / Reset durations
+os_patching_duration: 5            # The time taken to patch the OS
+node_reset_duration: 5             # The time taken to reset a node (hardware)
+service_patching_duration: 5       # The time taken to patch a service
+file_system_repairing_limit: 5      # The time take to repair the file system
+file_system_restoring_limit: 5      # The time take to restore the file system
+file_system_scanning_limit: 5       # The time taken to scan the file system
diff --git a/tests/config/training_config_main.yaml b/tests/config/training_config_main.yaml
new file mode 100644
index 00000000..3351d66b
--- /dev/null
+++ b/tests/config/training_config_main.yaml
@@ -0,0 +1,153 @@
+# Training Config File
+
+# Sets which agent algorithm framework will be used.
+# Options are:
+# "SB3" (Stable Baselines3)
+# "RLLIB" (Ray RLlib)
+# "CUSTOM" (Custom Agent)
+agent_framework: SB3
+
+# Sets which deep learning framework will be used (by RLlib ONLY).
+# Default is TF (Tensorflow).
+# Options are:
+# "TF" (Tensorflow)
+# TF2 (Tensorflow 2.X)
+# TORCH (PyTorch)
+deep_learning_framework: TF2
+
+# Sets which Agent class will be used.
+# Options are:
+# "A2C" (Advantage Actor Critic coupled with either SB3 or RLLIB agent_framework)
+# "PPO" (Proximal Policy Optimization coupled with either SB3 or RLLIB agent_framework)
+# "HARDCODED" (The HardCoded agents coupled with an ACL or NODE action_type)
+# "DO_NOTHING" (The DoNothing agents coupled with an ACL or NODE action_type)
+# "RANDOM" (primaite.agents.simple.RandomAgent)
+# "DUMMY" (primaite.agents.simple.DummyAgent)
+agent_identifier: PPO
+
+# Sets whether Red Agent POL and IER is randomised.
+# Options are:
+# True
+# False
+random_red_agent: False
+
+# Sets what view of the environment the deterministic hardcoded agent has. The default is BASIC.
+# Options are:
+# "BASIC" (The current observation space only)
+# "FULL" (Full environment view with actions taken and reward feedback)
+hard_coded_agent_view: FULL
+
+# Sets How the Action Space is defined:
+# "NODE"
+# "ACL"
+# "ANY" node and acl actions
+action_type: NODE
+# observation space
+observation_space:
+  # flatten: true
+  components:
+    - name: NODE_LINK_TABLE
+    # - name: NODE_STATUSES
+    # - name: LINK_TRAFFIC_LEVELS
+
+
+# Number of episodes for training to run per session
+num_train_episodes: 10
+
+# Number of time_steps for training per episode
+num_train_steps: 256
+
+# Number of episodes for evaluation to run per session
+num_eval_episodes: 10
+
+# Number of time_steps for evaluation per episode
+num_eval_steps: 256
+
+# Sets how often the agent will save a checkpoint (every n time episodes).
+# Set to 0 if no checkpoints are required. Default is 10
+checkpoint_every_n_episodes: 10
+
+# Time delay (milliseconds) between steps for CUSTOM agents.
+time_delay: 5
+
+# Type of session to be run. Options are:
+# "TRAIN" (Trains an agent)
+# "EVAL" (Evaluates an agent)
+# "TRAIN_EVAL" (Trains then evaluates an agent)
+session_type: TRAIN
+
+# Environment config values
+# The high value for the observation space
+observation_space_high_value: 1000000000
+
+# The Stable Baselines3 learn/eval output verbosity level:
+# Options are:
+# "NONE" (No Output)
+# "INFO" (Info Messages (such as devices and wrappers used))
+# "DEBUG" (All Messages)
+sb3_output_verbose_level: NONE
+
+# Reward values
+# Generic
+all_ok: 0
+# Node Hardware State
+off_should_be_on: -10
+off_should_be_resetting: -5
+on_should_be_off: -2
+on_should_be_resetting: -5
+resetting_should_be_on: -5
+resetting_should_be_off: -2
+resetting: -3
+# Node Software or Service State
+good_should_be_patching: 2
+good_should_be_compromised: 5
+good_should_be_overwhelmed: 5
+patching_should_be_good: -5
+patching_should_be_compromised: 2
+patching_should_be_overwhelmed: 2
+patching: -3
+compromised_should_be_good: -20
+compromised_should_be_patching: -20
+compromised_should_be_overwhelmed: -20
+compromised: -20
+overwhelmed_should_be_good: -20
+overwhelmed_should_be_patching: -20
+overwhelmed_should_be_compromised: -20
+overwhelmed: -20
+# Node File System State
+good_should_be_repairing: 2
+good_should_be_restoring: 2
+good_should_be_corrupt: 5
+good_should_be_destroyed: 10
+repairing_should_be_good: -5
+repairing_should_be_restoring: 2
+repairing_should_be_corrupt: 2
+repairing_should_be_destroyed: 0
+repairing: -3
+restoring_should_be_good: -10
+restoring_should_be_repairing: -2
+restoring_should_be_corrupt: 1
+restoring_should_be_destroyed: 2
+restoring: -6
+corrupt_should_be_good: -10
+corrupt_should_be_repairing: -10
+corrupt_should_be_restoring: -10
+corrupt_should_be_destroyed: 2
+corrupt: -10
+destroyed_should_be_good: -20
+destroyed_should_be_repairing: -20
+destroyed_should_be_restoring: -20
+destroyed_should_be_corrupt: -20
+destroyed: -20
+scanning: -2
+# IER status
+red_ier_running: -5
+green_ier_blocked: -10
+
+# Patching / Reset durations
+os_patching_duration: 5            # The time taken to patch the OS
+node_reset_duration: 5             # The time taken to reset a node (hardware)
+service_patching_duration: 5       # The time taken to patch a service
+file_system_repairing_limit: 5      # The time take to repair the file system
+file_system_restoring_limit: 5      # The time take to restore the file system
+file_system_scanning_limit: 5       # The time taken to scan the file system
diff --git a/tests/conftest.py b/tests/conftest.py
index af76b314..7e06bea7 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -163,8 +163,9 @@ def run_generic(env, config_values):
     """Run against a generic agent."""
     # Reset the environment at the start of the episode
     # env.reset()
-    for episode in range(0, config_values.num_episodes):
-        for step in range(0, config_values.num_steps):
+    print(config_values.num_train_episodes, "how many episodes")
+    for episode in range(0, config_values.num_train_episodes):
+        for step in range(0, config_values.num_train_steps):
             # Send the observation space to the agent to get an action
             # TEMP - random action for now
             # action = env.blue_agent_action(obs)
diff --git a/tests/test_single_action_space.py b/tests/test_single_action_space.py
index 5d55b9c9..bfcffd42 100644
--- a/tests/test_single_action_space.py
+++ b/tests/test_single_action_space.py
@@ -12,8 +12,8 @@ def run_generic_set_actions(env: Primaite):
     # Reset the environment at the start of the episode
     # env.reset()
     training_config = env.training_config
-    for episode in range(0, training_config.num_episodes):
-        for step in range(0, training_config.num_steps):
+    for episode in range(0, training_config.num_train_episodes):
+        for step in range(0, training_config.num_train_steps):
             # Send the observation space to the agent to get an action
             # TEMP - random action for now
             # action = env.blue_agent_action(obs)
diff --git a/tests/test_train_eval_episode_steps.py b/tests/test_train_eval_episode_steps.py
new file mode 100644
index 00000000..fad30f1b
--- /dev/null
+++ b/tests/test_train_eval_episode_steps.py
@@ -0,0 +1,46 @@
+import pytest
+
+from primaite import getLogger
+from primaite.config.lay_down_config import dos_very_basic_config_path
+from tests import TEST_CONFIG_ROOT
+from tests.conftest import run_generic
+
+_LOGGER = getLogger(__name__)
+
+
+@pytest.mark.parametrize(
+    "temp_primaite_session",
+    [[TEST_CONFIG_ROOT / "train_episode_step.yaml", dos_very_basic_config_path()]],
+    indirect=True,
+)
+def test_eval_steps_differ_from_training(temp_primaite_session):
+    """Uses PrimaiteSession class to compare number of episodes used for training and evaluation."""
+    with temp_primaite_session as train_session:
+        env = train_session.env
+        train_session.learn()
+
+    """
+    Train_episode_step.yaml main config:
+    num_train_steps = 1
+    num_eval_steps = 10
+
+    When the YAML file changes from TRAIN to EVAL the episode step changes and uses the other config value.
+
+    The test is showing that 10 steps are running for evaluation and NOT 1 step as EVAL has been selected in the config.
+    """
+    assert env.episode_steps == 10  # 30
+    # assert env.actual_episode_count == 10 # should be 10
+
+
+@pytest.mark.parametrize(
+    "temp_primaite_session",
+    [[TEST_CONFIG_ROOT / "train_episode_step.yaml", dos_very_basic_config_path()]],
+    indirect=True,
+)
+def test_train_eval_config_option(temp_primaite_session):
+    """Uses PrimaiteSession class to test number of episodes and steps used for TRAIN and EVAL option."""
+    with temp_primaite_session as train_session:
+        env = train_session.env
+        run_generic(env, env.training_config)
+
+    print(env.actual_episode_count, env.step_count, env.total_step_count)

From 40381833d3bae56ae95be5550e0d6c44992017bc Mon Sep 17 00:00:00 2001
From: Chris McCarthy <chris.mccarthy@methods.co.uk>
Date: Fri, 7 Jul 2023 15:50:14 +0100
Subject: [PATCH 03/32] #1566 - Refactored the test_train_eval_episode_steps.py
 to sue TempPrimaiteSession. - Fixed all errors that were caused b fixing the
 above. - Some tests still fail, these are for SS to fix. - Dropped the old
 run_generic stuff from conftest.py

---
 src/primaite/agents/agent.py                  |  14 +-
 src/primaite/agents/rllib.py                  |   9 +-
 src/primaite/agents/sb3.py                    |   6 +-
 .../training/training_config_main.yaml        |   2 +-
 src/primaite/config/training_config.py        |   6 +-
 src/primaite/environment/primaite_env.py      |  10 +-
 src/primaite/environment/reward.py            |   1 -
 src/primaite/utils/session_output_reader.py   |   5 +-
 ...ne_node_states_on_off_lay_down_config.yaml |   5 -
 .../one_node_states_on_off_main_config.yaml   |   8 +-
 tests/config/train_episode_step.yaml          |  12 +-
 .../config/train_eval_check_episode_step.yaml | 153 ------------------
 tests/conftest.py                             |  67 +-------
 tests/test_reward.py                          |   4 +-
 tests/test_train_eval_episode_steps.py        |  46 +++---
 15 files changed, 69 insertions(+), 279 deletions(-)
 delete mode 100644 tests/config/train_eval_check_episode_step.yaml

diff --git a/src/primaite/agents/agent.py b/src/primaite/agents/agent.py
index 2cdb242b..883e844b 100644
--- a/src/primaite/agents/agent.py
+++ b/src/primaite/agents/agent.py
@@ -153,12 +153,11 @@ class AgentSessionABC(ABC):
             metadata_dict = json.load(file)
 
         metadata_dict["end_datetime"] = datetime.now().isoformat()
-
         if not self.is_eval:
-            metadata_dict["learning"]["total_episodes"] = self._env.episode_count  # noqa
+            metadata_dict["learning"]["total_episodes"] = self._env.actual_episode_count  # noqa
             metadata_dict["learning"]["total_time_steps"] = self._env.total_step_count  # noqa
         else:
-            metadata_dict["evaluation"]["total_episodes"] = self._env.episode_count  # noqa
+            metadata_dict["evaluation"]["total_episodes"] = self._env.actual_episode_count  # noqa
             metadata_dict["evaluation"]["total_time_steps"] = self._env.total_step_count  # noqa
 
         filepath = self.session_path / "session_metadata.json"
@@ -209,10 +208,11 @@ class AgentSessionABC(ABC):
 
         :param kwargs: Any agent-specific key-word args to be passed.
         """
-        self._env.set_as_eval()  # noqa
-        self.is_eval = True
-        self._plot_av_reward_per_episode(learning_session=False)
-        _LOGGER.info("Finished evaluation")
+        if self._can_evaluate:
+            self._plot_av_reward_per_episode(learning_session=False)
+            self._update_session_metadata_file()
+            self.is_eval = True
+            _LOGGER.info("Finished evaluation")
 
     @abstractmethod
     def _get_latest_checkpoint(self):
diff --git a/src/primaite/agents/rllib.py b/src/primaite/agents/rllib.py
index 28d21e20..7067f6a6 100644
--- a/src/primaite/agents/rllib.py
+++ b/src/primaite/agents/rllib.py
@@ -85,8 +85,12 @@ class RLlibAgent(AgentSessionABC):
             metadata_dict = json.load(file)
 
         metadata_dict["end_datetime"] = datetime.now().isoformat()
-        metadata_dict["total_episodes"] = self._current_result["episodes_total"]
-        metadata_dict["total_time_steps"] = self._current_result["timesteps_total"]
+        if not self.is_eval:
+            metadata_dict["learning"]["total_episodes"] = self._current_result["episodes_total"]  # noqa
+            metadata_dict["learning"]["total_time_steps"] = self._current_result["timesteps_total"]  # noqa
+        else:
+            metadata_dict["evaluation"]["total_episodes"] = self._current_result["episodes_total"]  # noqa
+            metadata_dict["evaluation"]["total_time_steps"] = self._current_result["timesteps_total"]  # noqa
 
         filepath = self.session_path / "session_metadata.json"
         _LOGGER.debug(f"Updating Session Metadata file: {filepath}")
@@ -150,7 +154,6 @@ class RLlibAgent(AgentSessionABC):
 
         super().learn()
 
-
     def evaluate(
         self,
         **kwargs,
diff --git a/src/primaite/agents/sb3.py b/src/primaite/agents/sb3.py
index 00983140..dc049e91 100644
--- a/src/primaite/agents/sb3.py
+++ b/src/primaite/agents/sb3.py
@@ -58,7 +58,7 @@ class SB3Agent(AgentSessionABC):
             PPOMlp,
             self._env,
             verbose=self.sb3_output_verbose_level,
-            n_steps=self._training_config.num_eval_steps,
+            n_steps=self._training_config.num_train_steps,
             tensorboard_log=str(self._tensorboard_log_path),
             seed=self._training_config.seed,
         )
@@ -93,7 +93,7 @@ class SB3Agent(AgentSessionABC):
         for i in range(episodes):
             self._agent.learn(total_timesteps=time_steps)
             self._save_checkpoint()
-        self._env.reset()
+        self._env._write_av_reward_per_episode()  # noqa
         self.save()
         self._env.close()
         super().learn()
@@ -129,7 +129,7 @@ class SB3Agent(AgentSessionABC):
                 if isinstance(action, np.ndarray):
                     action = np.int64(action)
                 obs, rewards, done, info = self._env.step(action)
-        self._env.reset()
+        self._env._write_av_reward_per_episode()  # noqa
         self._env.close()
         super().evaluate()
 
diff --git a/src/primaite/config/_package_data/training/training_config_main.yaml b/src/primaite/config/_package_data/training/training_config_main.yaml
index f45f976a..61c45758 100644
--- a/src/primaite/config/_package_data/training/training_config_main.yaml
+++ b/src/primaite/config/_package_data/training/training_config_main.yaml
@@ -68,7 +68,7 @@ num_train_episodes: 10
 num_train_steps: 256
 
 # Number of episodes for evaluation to run per session
-num_eval_episodes: 10
+num_eval_episodes: 1
 
 # Number of time_steps for evaluation per episode
 num_eval_steps: 256
diff --git a/src/primaite/config/training_config.py b/src/primaite/config/training_config.py
index 2b46e513..5bbe881b 100644
--- a/src/primaite/config/training_config.py
+++ b/src/primaite/config/training_config.py
@@ -66,7 +66,7 @@ class TrainingConfig:
     num_train_steps: int = 256
     "The number of steps in an episode during an training session"
 
-    num_eval_episodes: int = 10
+    num_eval_episodes: int = 1
     "The number of episodes to train over during an evaluation session"
 
     num_eval_steps: int = 256
@@ -242,10 +242,10 @@ class TrainingConfig:
             tc += f"{self.hard_coded_agent_view}, "
         tc += f"{self.action_type}, "
         tc += f"observation_space={self.observation_space}, "
-        if self.session_type.name == "TRAIN":
+        if self.session_type is SessionType.TRAIN:
             tc += f"{self.num_train_episodes} episodes @ "
             tc += f"{self.num_train_steps} steps"
-        elif self.session_type.name == "EVAL":
+        elif self.session_type is SessionType.EVAL:
             tc += f"{self.num_eval_episodes} episodes @ "
             tc += f"{self.num_eval_steps} steps"
         else:
diff --git a/src/primaite/environment/primaite_env.py b/src/primaite/environment/primaite_env.py
index 18cf8767..ed6eefb2 100644
--- a/src/primaite/environment/primaite_env.py
+++ b/src/primaite/environment/primaite_env.py
@@ -261,6 +261,11 @@ class Primaite(Env):
         self.total_step_count = 0
         self.episode_steps = self.training_config.num_eval_steps
 
+    def _write_av_reward_per_episode(self):
+        if self.actual_episode_count > 0:
+            csv_data = self.actual_episode_count, self.average_reward
+            self.episode_av_reward_writer.write(csv_data)
+
     def reset(self):
         """
         AI Gym Reset function.
@@ -268,10 +273,7 @@ class Primaite(Env):
         Returns:
              Environment observation space (reset)
         """
-        if self.actual_episode_count > 0:
-            csv_data = self.actual_episode_count, self.average_reward
-            self.episode_av_reward_writer.write(csv_data)
-
+        self._write_av_reward_per_episode()
         self.episode_count += 1
 
         # Don't need to reset links, as they are cleared and recalculated every
diff --git a/src/primaite/environment/reward.py b/src/primaite/environment/reward.py
index e4353cb9..9cbb0078 100644
--- a/src/primaite/environment/reward.py
+++ b/src/primaite/environment/reward.py
@@ -90,7 +90,6 @@ def calculate_reward_function(
                         f"Penalty of {ier_reward} was NOT applied."
                     )
                 )
-
     return reward_value
 
 
diff --git a/src/primaite/utils/session_output_reader.py b/src/primaite/utils/session_output_reader.py
index d04f375e..eb7a7675 100644
--- a/src/primaite/utils/session_output_reader.py
+++ b/src/primaite/utils/session_output_reader.py
@@ -16,5 +16,6 @@ def av_rewards_dict(av_rewards_csv_file: Union[str, Path]) -> Dict[int, float]:
     :param av_rewards_csv_file: The average rewards per episode csv file path.
     :return: The average rewards per episode cdv as a dict.
     """
-    d = pl.read_csv(av_rewards_csv_file).to_dict()
-    return {v: d["Average Reward"][i] for i, v in enumerate(d["Episode"])}
+    df = pl.read_csv(av_rewards_csv_file).to_dict()
+
+    return {v: df["Average Reward"][i] for i, v in enumerate(df["Episode"])}
diff --git a/tests/config/one_node_states_on_off_lay_down_config.yaml b/tests/config/one_node_states_on_off_lay_down_config.yaml
index 996cf368..aadbd449 100644
--- a/tests/config/one_node_states_on_off_lay_down_config.yaml
+++ b/tests/config/one_node_states_on_off_lay_down_config.yaml
@@ -18,11 +18,6 @@
   - name: ftp
     port: '21'
     state: GOOD
-- item_type: POSITION
-  positions:
-  - node: '1'
-    x_pos: 309
-    y_pos: 78
 - item_type: RED_POL
   id: '1'
   start_step: 1
diff --git a/tests/config/one_node_states_on_off_main_config.yaml b/tests/config/one_node_states_on_off_main_config.yaml
index 63fdd1a5..dd425a8c 100644
--- a/tests/config/one_node_states_on_off_main_config.yaml
+++ b/tests/config/one_node_states_on_off_main_config.yaml
@@ -22,17 +22,13 @@ agent_identifier: DUMMY
 # "ACL"
 # "ANY" node and acl actions
 action_type: NODE
-# Number of episodes for training to run per session
-num_train_episodes: 10
 
-# Number of time_steps for training per episode
-num_train_steps: 256
 
 # Number of episodes for evaluation to run per session
-num_eval_episodes: 10
+num_eval_episodes: 1
 
 # Number of time_steps for evaluation per episode
-num_eval_steps: 256
+num_eval_steps: 15
 # Time delay between steps (for generic agents)
 time_delay: 1
 
diff --git a/tests/config/train_episode_step.yaml b/tests/config/train_episode_step.yaml
index 550b95fd..f112b741 100644
--- a/tests/config/train_episode_step.yaml
+++ b/tests/config/train_episode_step.yaml
@@ -52,20 +52,20 @@ observation_space:
 
 
 # Number of episodes for training to run per session
-num_train_episodes: 30
+num_train_episodes: 3
 
 # Number of time_steps for training per episode
-num_train_steps: 1
+num_train_steps: 25
 
 # Number of episodes for evaluation to run per session
-num_eval_episodes: 10
+num_eval_episodes: 1
 
 # Number of time_steps for evaluation per episode
-num_eval_steps: 10
+num_eval_steps: 17
 
 # Sets how often the agent will save a checkpoint (every n time episodes).
 # Set to 0 if no checkpoints are required. Default is 10
-checkpoint_every_n_episodes: 10
+checkpoint_every_n_episodes: 0
 
 # Time delay (milliseconds) between steps for CUSTOM agents.
 time_delay: 5
@@ -74,7 +74,7 @@ time_delay: 5
 # "TRAIN" (Trains an agent)
 # "EVAL" (Evaluates an agent)
 # "TRAIN_EVAL" (Trains then evaluates an agent)
-session_type: EVAL
+session_type: TRAIN_EVAL
 
 # Environment config values
 # The high value for the observation space
diff --git a/tests/config/train_eval_check_episode_step.yaml b/tests/config/train_eval_check_episode_step.yaml
deleted file mode 100644
index f616116e..00000000
--- a/tests/config/train_eval_check_episode_step.yaml
+++ /dev/null
@@ -1,153 +0,0 @@
-# Training Config File
-
-# Sets which agent algorithm framework will be used.
-# Options are:
-# "SB3" (Stable Baselines3)
-# "RLLIB" (Ray RLlib)
-# "CUSTOM" (Custom Agent)
-agent_framework: SB3
-
-# Sets which deep learning framework will be used (by RLlib ONLY).
-# Default is TF (Tensorflow).
-# Options are:
-# "TF" (Tensorflow)
-# TF2 (Tensorflow 2.X)
-# TORCH (PyTorch)
-deep_learning_framework: TF2
-
-# Sets which Agent class will be used.
-# Options are:
-# "A2C" (Advantage Actor Critic coupled with either SB3 or RLLIB agent_framework)
-# "PPO" (Proximal Policy Optimization coupled with either SB3 or RLLIB agent_framework)
-# "HARDCODED" (The HardCoded agents coupled with an ACL or NODE action_type)
-# "DO_NOTHING" (The DoNothing agents coupled with an ACL or NODE action_type)
-# "RANDOM" (primaite.agents.simple.RandomAgent)
-# "DUMMY" (primaite.agents.simple.DummyAgent)
-agent_identifier: PPO
-
-# Sets whether Red Agent POL and IER is randomised.
-# Options are:
-# True
-# False
-random_red_agent: False
-
-# Sets what view of the environment the deterministic hardcoded agent has. The default is BASIC.
-# Options are:
-# "BASIC" (The current observation space only)
-# "FULL" (Full environment view with actions taken and reward feedback)
-hard_coded_agent_view: FULL
-
-# Sets How the Action Space is defined:
-# "NODE"
-# "ACL"
-# "ANY" node and acl actions
-action_type: NODE
-# observation space
-observation_space:
-  # flatten: true
-  components:
-    - name: NODE_LINK_TABLE
-    # - name: NODE_STATUSES
-    # - name: LINK_TRAFFIC_LEVELS
-
-
-# Number of episodes for training to run per session
-num_train_episodes: 30
-
-# Number of time_steps for training per episode
-num_train_steps: 1
-
-# Number of episodes for evaluation to run per session
-num_eval_episodes: 10
-
-# Number of time_steps for evaluation per episode
-num_eval_steps: 10
-
-# Sets how often the agent will save a checkpoint (every n time episodes).
-# Set to 0 if no checkpoints are required. Default is 10
-checkpoint_every_n_episodes: 10
-
-# Time delay (milliseconds) between steps for CUSTOM agents.
-time_delay: 5
-
-# Type of session to be run. Options are:
-# "TRAIN" (Trains an agent)
-# "EVAL" (Evaluates an agent)
-# "TRAIN_EVAL" (Trains then evaluates an agent)
-session_type: TRAIN
-
-# Environment config values
-# The high value for the observation space
-observation_space_high_value: 1000000000
-
-# The Stable Baselines3 learn/eval output verbosity level:
-# Options are:
-# "NONE" (No Output)
-# "INFO" (Info Messages (such as devices and wrappers used))
-# "DEBUG" (All Messages)
-sb3_output_verbose_level: NONE
-
-# Reward values
-# Generic
-all_ok: 0
-# Node Hardware State
-off_should_be_on: -10
-off_should_be_resetting: -5
-on_should_be_off: -2
-on_should_be_resetting: -5
-resetting_should_be_on: -5
-resetting_should_be_off: -2
-resetting: -3
-# Node Software or Service State
-good_should_be_patching: 2
-good_should_be_compromised: 5
-good_should_be_overwhelmed: 5
-patching_should_be_good: -5
-patching_should_be_compromised: 2
-patching_should_be_overwhelmed: 2
-patching: -3
-compromised_should_be_good: -20
-compromised_should_be_patching: -20
-compromised_should_be_overwhelmed: -20
-compromised: -20
-overwhelmed_should_be_good: -20
-overwhelmed_should_be_patching: -20
-overwhelmed_should_be_compromised: -20
-overwhelmed: -20
-# Node File System State
-good_should_be_repairing: 2
-good_should_be_restoring: 2
-good_should_be_corrupt: 5
-good_should_be_destroyed: 10
-repairing_should_be_good: -5
-repairing_should_be_restoring: 2
-repairing_should_be_corrupt: 2
-repairing_should_be_destroyed: 0
-repairing: -3
-restoring_should_be_good: -10
-restoring_should_be_repairing: -2
-restoring_should_be_corrupt: 1
-restoring_should_be_destroyed: 2
-restoring: -6
-corrupt_should_be_good: -10
-corrupt_should_be_repairing: -10
-corrupt_should_be_restoring: -10
-corrupt_should_be_destroyed: 2
-corrupt: -10
-destroyed_should_be_good: -20
-destroyed_should_be_repairing: -20
-destroyed_should_be_restoring: -20
-destroyed_should_be_corrupt: -20
-destroyed: -20
-scanning: -2
-# IER status
-red_ier_running: -5
-green_ier_blocked: -10
-
-# Patching / Reset durations
-os_patching_duration: 5            # The time taken to patch the OS
-node_reset_duration: 5             # The time taken to reset a node (hardware)
-service_patching_duration: 5       # The time taken to patch a service
-file_system_repairing_limit: 5      # The time take to repair the file system
-file_system_restoring_limit: 5      # The time take to restore the file system
-file_system_scanning_limit: 5       # The time taken to scan the file system
diff --git a/tests/conftest.py b/tests/conftest.py
index 2d78f61d..aaf4dbce 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -1,17 +1,16 @@
 # Crown Copyright (C) Dstl 2022. DEFCON 703. Shared in confidence.
 import datetime
+import json
 import shutil
 import tempfile
-import time
 from datetime import datetime
 from pathlib import Path
-from typing import Dict, Union
+from typing import Any, Dict, Union
 from unittest.mock import patch
 
 import pytest
 
 from primaite import getLogger
-from primaite.common.enums import AgentIdentifier
 from primaite.environment.primaite_env import Primaite
 from primaite.primaite_session import PrimaiteSession
 from primaite.utils.session_output_reader import av_rewards_dict
@@ -48,6 +47,11 @@ class TempPrimaiteSession(PrimaiteSession):
         csv_file = f"average_reward_per_episode_{self.timestamp_str}.csv"
         return av_rewards_dict(self.evaluation_path / csv_file)
 
+    def metadata_file_as_dict(self) -> Dict[str, Any]:
+        """Read the session_metadata.json file and return as a dict."""
+        with open(self.session_path / "session_metadata.json", "r") as file:
+            return json.load(file)
+
     @property
     def env(self) -> Primaite:
         """Direct access to the env for ease of testing."""
@@ -58,6 +62,7 @@ class TempPrimaiteSession(PrimaiteSession):
 
     def __exit__(self, type, value, tb):
         shutil.rmtree(self.session_path)
+        shutil.rmtree(self.session_path.parent)
         _LOGGER.debug(f"Deleted temp session directory: {self.session_path}")
 
 
@@ -129,59 +134,3 @@ def temp_session_path() -> Path:
     session_path.mkdir(exist_ok=True, parents=True)
 
     return session_path
-
-
-def _get_primaite_env_from_config(
-    training_config_path: Union[str, Path],
-    lay_down_config_path: Union[str, Path],
-    temp_session_path,
-):
-    """Takes a config path and returns the created instance of Primaite."""
-    session_timestamp: datetime = datetime.now()
-    session_path = temp_session_path(session_timestamp)
-
-    timestamp_str = session_timestamp.strftime("%Y-%m-%d_%H-%M-%S")
-    env = Primaite(
-        training_config_path=training_config_path,
-        lay_down_config_path=lay_down_config_path,
-        session_path=session_path,
-        timestamp_str=timestamp_str,
-    )
-    config_values = env.training_config
-    config_values.num_steps = env.episode_steps
-
-    # TOOD: This needs t be refactored to happen outside. Should be part of
-    # a main Session class.
-    if env.training_config.agent_identifier is AgentIdentifier.RANDOM:
-        run_generic(env, config_values)
-
-    return env
-
-
-def run_generic(env, config_values):
-    """Run against a generic agent."""
-    # Reset the environment at the start of the episode
-    # env.reset()
-    print(config_values.num_train_episodes, "how many episodes")
-    for episode in range(0, config_values.num_train_episodes):
-        for step in range(0, config_values.num_train_steps):
-            # Send the observation space to the agent to get an action
-            # TEMP - random action for now
-            # action = env.blue_agent_action(obs)
-            # action = env.action_space.sample()
-            action = 0
-
-            # Run the simulation step on the live environment
-            obs, reward, done, info = env.step(action)
-
-            # Break if done is True
-            if done:
-                break
-
-            # Introduce a delay between steps
-            time.sleep(config_values.time_delay / 1000)
-
-        # Reset the environment at the end of the episode
-        # env.reset()
-
-    # env.close()
diff --git a/tests/test_reward.py b/tests/test_reward.py
index 81437860..d1b56671 100644
--- a/tests/test_reward.py
+++ b/tests/test_reward.py
@@ -1,7 +1,10 @@
 import pytest
 
+from primaite import getLogger
 from tests import TEST_CONFIG_ROOT
 
+_LOGGER = getLogger(__name__)
+
 
 @pytest.mark.parametrize(
     "temp_primaite_session",
@@ -44,7 +47,6 @@ def test_rewards_are_being_penalised_at_each_step_function(
         Average Reward: -8 (-120 / 15)
     """
     with temp_primaite_session as session:
-        session.evaluate()
         session.close()
         ev_rewards = session.eval_av_reward_per_episode_csv()
         assert ev_rewards[1] == -8.0
diff --git a/tests/test_train_eval_episode_steps.py b/tests/test_train_eval_episode_steps.py
index fad30f1b..daa93055 100644
--- a/tests/test_train_eval_episode_steps.py
+++ b/tests/test_train_eval_episode_steps.py
@@ -3,7 +3,6 @@ import pytest
 from primaite import getLogger
 from primaite.config.lay_down_config import dos_very_basic_config_path
 from tests import TEST_CONFIG_ROOT
-from tests.conftest import run_generic
 
 _LOGGER = getLogger(__name__)
 
@@ -14,33 +13,30 @@ _LOGGER = getLogger(__name__)
     indirect=True,
 )
 def test_eval_steps_differ_from_training(temp_primaite_session):
-    """Uses PrimaiteSession class to compare number of episodes used for training and evaluation."""
-    with temp_primaite_session as train_session:
-        env = train_session.env
-        train_session.learn()
+    """Uses PrimaiteSession class to compare number of episodes used for training and evaluation.
 
-    """
     Train_episode_step.yaml main config:
-    num_train_steps = 1
-    num_eval_steps = 10
-
-    When the YAML file changes from TRAIN to EVAL the episode step changes and uses the other config value.
-
-    The test is showing that 10 steps are running for evaluation and NOT 1 step as EVAL has been selected in the config.
+        num_train_steps = 25
+        num_train_episodes = 3
+        num_eval_steps = 17
+        num_eval_episodes = 1
     """
-    assert env.episode_steps == 10  # 30
-    # assert env.actual_episode_count == 10 # should be 10
+    expected_learning_metadata = {"total_episodes": 3, "total_time_steps": 75}
 
+    expected_evaluation_metadata = {"total_episodes": 1, "total_time_steps": 17}
+    with temp_primaite_session as session:
+        # Run learning and check episode and step counts
+        session.learn()
+        assert session.env.actual_episode_count == expected_learning_metadata["total_episodes"]
+        assert session.env.total_step_count == expected_learning_metadata["total_time_steps"]
 
-@pytest.mark.parametrize(
-    "temp_primaite_session",
-    [[TEST_CONFIG_ROOT / "train_episode_step.yaml", dos_very_basic_config_path()]],
-    indirect=True,
-)
-def test_train_eval_config_option(temp_primaite_session):
-    """Uses PrimaiteSession class to test number of episodes and steps used for TRAIN and EVAL option."""
-    with temp_primaite_session as train_session:
-        env = train_session.env
-        run_generic(env, env.training_config)
+        # Run evaluation and check episode and step counts
+        session.evaluate()
+        assert session.env.actual_episode_count == expected_evaluation_metadata["total_episodes"]
+        assert session.env.total_step_count == expected_evaluation_metadata["total_time_steps"]
 
-    print(env.actual_episode_count, env.step_count, env.total_step_count)
+        # Load the session_metadata.json file and check that the both the
+        # learning and evaluation match what is expected above
+        metadata = session.metadata_file_as_dict()
+        assert metadata["learning"] == expected_learning_metadata
+        assert metadata["evaluation"] == expected_evaluation_metadata

From 41fab6562e8be6312256b597b7519815ae57938c Mon Sep 17 00:00:00 2001
From: SunilSamra <sunil-samra@nsc.co.uk>
Date: Fri, 7 Jul 2023 16:26:12 +0100
Subject: [PATCH 04/32] 1566 - updated configs to correct values of step count
 and number of episodes

---
 .../obs_tests/main_config_LINK_TRAFFIC_LEVELS.yaml    | 11 ++---------
 .../config/obs_tests/main_config_NODE_LINK_TABLE.yaml | 10 ++--------
 tests/config/obs_tests/main_config_NODE_STATUSES.yaml |  9 ++-------
 tests/config/obs_tests/main_config_without_obs.yaml   | 10 ++--------
 4 files changed, 8 insertions(+), 32 deletions(-)

diff --git a/tests/config/obs_tests/main_config_LINK_TRAFFIC_LEVELS.yaml b/tests/config/obs_tests/main_config_LINK_TRAFFIC_LEVELS.yaml
index bbdce9c1..2ac8f59a 100644
--- a/tests/config/obs_tests/main_config_LINK_TRAFFIC_LEVELS.yaml
+++ b/tests/config/obs_tests/main_config_LINK_TRAFFIC_LEVELS.yaml
@@ -23,17 +23,10 @@ agent_identifier: A2C
 # "ANY" node and acl actions
 action_type: ANY
 # Number of episodes for training to run per session
-num_train_episodes: 10
+num_train_episodes: 1
 
 # Number of time_steps for training per episode
-num_train_steps: 256
-
-# Number of episodes for evaluation to run per session
-num_eval_episodes: 10
-
-# Number of time_steps for evaluation per episode
-num_eval_steps: 256
-
+num_train_steps: 5
 
 observation_space:
   components:
diff --git a/tests/config/obs_tests/main_config_NODE_LINK_TABLE.yaml b/tests/config/obs_tests/main_config_NODE_LINK_TABLE.yaml
index 41b3e588..a9986d5b 100644
--- a/tests/config/obs_tests/main_config_NODE_LINK_TABLE.yaml
+++ b/tests/config/obs_tests/main_config_NODE_LINK_TABLE.yaml
@@ -23,16 +23,10 @@ agent_identifier: RANDOM
 # "ANY" node and acl actions
 action_type: ANY
 # Number of episodes for training to run per session
-num_train_episodes: 10
+num_train_episodes: 1
 
 # Number of time_steps for training per episode
-num_train_steps: 256
-
-# Number of episodes for evaluation to run per session
-num_eval_episodes: 10
-
-# Number of time_steps for evaluation per episode
-num_eval_steps: 256
+num_train_steps: 5
 
 observation_space:
   components:
diff --git a/tests/config/obs_tests/main_config_NODE_STATUSES.yaml b/tests/config/obs_tests/main_config_NODE_STATUSES.yaml
index 34758199..a129712c 100644
--- a/tests/config/obs_tests/main_config_NODE_STATUSES.yaml
+++ b/tests/config/obs_tests/main_config_NODE_STATUSES.yaml
@@ -23,16 +23,11 @@ agent_identifier: RANDOM
 # "ANY" node and acl actions
 action_type: ANY
 # Number of episodes for training to run per session
-num_train_episodes: 10
+num_train_episodes: 1
 
 # Number of time_steps for training per episode
-num_train_steps: 256
+num_train_steps: 5
 
-# Number of episodes for evaluation to run per session
-num_eval_episodes: 10
-
-# Number of time_steps for evaluation per episode
-num_eval_steps: 256
 
 observation_space:
   components:
diff --git a/tests/config/obs_tests/main_config_without_obs.yaml b/tests/config/obs_tests/main_config_without_obs.yaml
index 352e765c..03d11b82 100644
--- a/tests/config/obs_tests/main_config_without_obs.yaml
+++ b/tests/config/obs_tests/main_config_without_obs.yaml
@@ -23,16 +23,10 @@ agent_identifier: RANDOM
 # "ANY" node and acl actions
 action_type: ANY
 # Number of episodes for training to run per session
-num_train_episodes: 10
+num_train_episodes: 1
 
 # Number of time_steps for training per episode
-num_train_steps: 256
-
-# Number of episodes for evaluation to run per session
-num_eval_episodes: 10
-
-# Number of time_steps for evaluation per episode
-num_eval_steps: 256
+num_train_steps: 5
 # Time delay between steps (for generic agents)
 time_delay: 1
 # Type of session to be run (TRAINING or EVALUATION)

From 17894376c6ae17229ac57a05c8ac381f7995d639 Mon Sep 17 00:00:00 2001
From: Marek Wolan <marek.wolan@methods.co.uk>
Date: Sun, 9 Jul 2023 18:07:21 +0100
Subject: [PATCH 05/32] Removed comment

---
 src/primaite/agents/utils.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/primaite/agents/utils.py b/src/primaite/agents/utils.py
index b5a3c673..acc70cc4 100644
--- a/src/primaite/agents/utils.py
+++ b/src/primaite/agents/utils.py
@@ -165,7 +165,6 @@ def transform_change_obs_readable(obs):
     os_states = [SoftwareState(i).name for i in obs[:, 2]]
     new_obs = [ids, operating_states, os_states]
 
-    # changed range(3,...) to range(4,...) because we added file system which was new since ADSP
     for service in range(4, obs.shape[1]):
         # Links bit/s don't have a service state
         service_states = [SoftwareState(i).name if i <= 4 else i for i in obs[:, service]]

From 01455321037dbec72d8ad9c6a7cb3d695fe19052 Mon Sep 17 00:00:00 2001
From: Marek Wolan <marek.wolan@methods.co.uk>
Date: Sun, 9 Jul 2023 20:23:53 +0100
Subject: [PATCH 06/32] Update docs

---
 docs/source/about.rst            | 86 +++++++++++++++-----------------
 docs/source/custom_agent.rst     | 76 ++++++++++++++++++++++++++--
 docs/source/primaite_session.rst |  2 +-
 3 files changed, 111 insertions(+), 53 deletions(-)

diff --git a/docs/source/about.rst b/docs/source/about.rst
index 1f4669fe..a4a92b92 100644
--- a/docs/source/about.rst
+++ b/docs/source/about.rst
@@ -10,11 +10,11 @@ PrimAITE provides the following features:
 
 * A flexible network / system laydown based on the Python networkx framework
 * Nodes and links (edges) host Python classes in order to present attributes and methods (and hence, a more representative model of a platform / system)
-* A ‘green agent’ Information Exchange Requirement (IER) function allows the representation of traffic (protocols and loading) on any / all links. Application of IERs is based on the status of node operating systems and services
-* A ‘green agent’ node Pattern-of-Life (PoL) function allows the representation of core behaviours on nodes (e.g. Hardware state, Software State, Service state, File System state)
+* A 'green agent' Information Exchange Requirement (IER) function allows the representation of traffic (protocols and loading) on any / all links. Application of IERs is based on the status of node operating systems and services
+* A 'green agent' node Pattern-of-Life (PoL) function allows the representation of core behaviours on nodes (e.g. changing the Hardware state, Software State, Service state, or File System state)
 * An Access Control List (ACL) function, mimicking the behaviour of a network firewall, is applied across the model, following standard ACL rule format (e.g. DENY/ALLOW, source IP, destination IP, protocol and port). Application of IERs adheres to any ACL restrictions
 * Presents an OpenAI Gym interface to the environment, allowing integration with any OpenAI Gym compliant defensive agents
-* Red agent activity based on ‘red’ IERs and ‘red’ PoL
+* Red agent activity based on 'red' IERs and 'red' PoL
 * Defined reward function for use with RL agents (based on nodes status, and green / red IER success)
 * Fully configurable (network / system laydown, IERs, node PoL, ACL, episode step period, episode max steps) and repeatable to suit the training requirements of agents. Therefore, not bound to a representation of any particular platform, system or technology
 * Full capture of discrete metrics relating to agent training (full system state, agent actions taken, average reward)
@@ -201,7 +201,7 @@ An example observation space is provided below:
    * -
      - ID
      - Hardware State
-     - SoftwareState
+     - Software State
      - File System State
      - Service / Protocol A
      - Service / Protocol B
@@ -250,48 +250,35 @@ An example observation space is provided below:
 
 For the nodes, the following values are represented:
 
- * ID
- * Hardware State:
+.. code-block::
 
-    * 1 = ON
-    * 2 = OFF
-    * 3 = RESETTING
-    * 4 = SHUTTING_DOWN
-    * 5 = BOOTING
-
- * SoftwareState:
-
-    * 1 = GOOD
-    * 2 = PATCHING
-    * 3 = COMPROMISED
-
- * Service State:
-
-    * 1 = GOOD
-    * 2 = PATCHING
-    * 3 = COMPROMISED
-    * 4 = OVERWHELMED
-
- * File System State:
-
-    * 1 = GOOD
-    * 2 = CORRUPT
-    * 3 = DESTROYED
-    * 4 = REPAIRING
-    * 5 = RESTORING
+  [
+    ID
+    Hardware State            (1=ON,   2=OFF,  3=RESETTING,  4=SHUTTING_DOWN, 5=BOOTING)
+    Operating System State    (0=none, 1=GOOD, 2=PATCHING,   3=COMPROMISED)
+    File System State         (0=none, 1=GOOD, 2=CORRUPT,    3=DESTROYED,  4=REPAIRING, 5=RESTORING)
+    Service1/Protocol1 state  (0=none, 1=GOOD, 2=PATCHING,   3=COMPROMISED)
+    Service2/Protocol2 state  (0=none, 1=GOOD, 2=PATCHING,   3=COMPROMISED)
+  ]
 
 (Note that each service available in the network is provided as a column, although not all nodes may utilise all services)
 
 For the links, the following statuses are represented:
 
- * ID
- * Hardware State = N/A
- * SoftwareState = N/A
- * Protocol = loading in bits/s
+.. code-block::
+
+  [
+    ID
+    Hardware State            (0=not applicable)
+    Operating System State    (0=not applicable)
+    File System State         (0=not applicable)
+    Service1/Protocol1 state  (Traffic load from this protocol on this link)
+    Service2/Protocol2 state  (Traffic load from this protocol on this link)
+  ]
 
 NodeStatus component
 ----------------------
-This is a MultiDiscrete observation space that can be though of as a one-dimensional vector of discrete states, represented by integers.
+This is a MultiDiscrete observation space that can be though of as a one-dimensional vector of discrete states.
 The example above would have the following structure:
 
 .. code-block::
@@ -307,9 +294,9 @@ Each ``node_info`` contains the following:
 .. code-block::
 
   [
-    hardware_state    (0=none, 1=ON, 2=OFF, 3=RESETTING, 4=SHUTTING_DOWN, 5=BOOTING)
+    hardware_state    (0=none, 1=ON,   2=OFF,      3=RESETTING, 4=SHUTTING_DOWN, 5=BOOTING)
     software_state    (0=none, 1=GOOD, 2=PATCHING, 3=COMPROMISED)
-    file_system_state (0=none, 1=GOOD, 2=CORRUPT, 3=DESTROYED, 4=REPAIRING, 5=RESTORING)
+    file_system_state (0=none, 1=GOOD, 2=CORRUPT,  3=DESTROYED, 4=REPAIRING, 5=RESTORING)
     service1_state    (0=none, 1=GOOD, 2=PATCHING, 3=COMPROMISED)
     service2_state    (0=none, 1=GOOD, 2=PATCHING, 3=COMPROMISED)
   ]
@@ -320,10 +307,18 @@ In a network with three nodes and two services, the full observation space would
 
   gym.spaces.MultiDiscrete([4,5,6,4,4,4,5,6,4,4,4,5,6,4,4])
 
+.. note::
+  NodeStatus observation component provides information only about nodes. Links are not considered.
+
 LinkTrafficLevels
 -----------------
 This component is a MultiDiscrete space showing the traffic flow levels on the links in the network, after applying a threshold to convert it from a continuous to a discrete value.
-The number of bins can be customised with 5 being the default. It has the following strucutre:
+There are two configurable parameters:
+* ``quantisation_levels`` determines how many discrete bins to use for converting the continuous traffic value to discrete (default is 5).
+* ``combine_service_traffic`` determines whether to separately output traffic use for each network protocol or whether to combine them into an overall value for the link. (default is ``True``)
+
+For example, with default parameters and a network with three links, the structure of this component would be:
+
 .. code-block::
 
   [
@@ -337,16 +332,13 @@ Each ``link_status`` is a number from 0-4 representing the network load in relat
 .. code-block::
 
   0 = No traffic (0%)
-  1 = low traffic (<33%)
-  2 = medium traffic (<66%)
-  3 = high traffic (<100%)
+  1 = low traffic (1%-33%)
+  2 = medium traffic (33%-66%)
+  3 = high traffic (66%-99%)
   4 = max traffic/ overwhelmed (100%)
 
-If the network has three links, the full observation space would have 3 elements. It can be written with ``gym`` notation to indicate the number of discrete options for each of the elements of the observation space. For example:
+Using ``gym`` notation, the shape of the obs space is: ``gym.spaces.MultiDiscrete([5,5,5])``.
 
-.. code-block::
-
-  gym.spaces.MultiDiscrete([5,5,5])
 
 Action Spaces
 **************
diff --git a/docs/source/custom_agent.rst b/docs/source/custom_agent.rst
index ed1d35c7..53594a8f 100644
--- a/docs/source/custom_agent.rst
+++ b/docs/source/custom_agent.rst
@@ -4,12 +4,78 @@
 
 **Integrating a user defined blue agent**
 
-Integrating a blue agent with PrimAITE requires some modification of the code within the main.py file. The main.py file
-consists of a number of functions, each of which will invoke training for a particular agent. These are:
+PrimAITE has integration with Ray RLLib and StableBaselines3 agents. All agents interface with PrimAITE through an :py:class:`primaite.agents.agent.AgentSessionABC<Agent Session>` which provides Input/Output of agent savefiles, as well as capturing and plotting performance metrics during training. If you wish to integrate a custom blue agent, it is recommended to create a subclass of the :py:class:`primaite.agents.agent.AgentSessionABC` and implement the ``__init__()``, ``_setup()``,  ``_save_checkpoint()``, ``learn()``, ``evaluate()``, ``_get_latest_checkpoint``, ``load()``, ``save()``, and ``export()`` methods. You will also need to modify :py:class:`primaite.primaite_session.PrimaiteSession<PrimaiteSession>` class to capture your new agent identifier.
+
+Below is a barebones example of a custom agent implementation:
+
+.. code:: python
+
+    from primaite.agents.agent import AgentSessionABC
+    from primaite.common.enums import AgentFramework, AgentIdentifier
+
+    class CustomAgent(AgentSessionABC):
+        def __init__(self, training_config_path, lay_down_config_path):
+            super().__init__(training_config_path, lay_down_config_path)
+            assert self._training_config.agent_framework == AgentFramework.CUSTOM
+            assert self._training_config.agent_identifier == AgentIdentifier.MY_AGENT
+            self._setup()
+
+        def _setup(self):
+            super()._setup()
+            self._env = Primaite(
+                training_config_path=self._training_config_path,
+                lay_down_config_path=self._lay_down_config_path,
+                session_path=self.session_path,
+                timestamp_str=self.timestamp_str,
+        )
+            self._agent = ... # your code to setup agent
+
+        def _save_checkpoint(self):
+            checkpoint_num = self._training_config.checkpoint_every_n_episodes
+            episode_count = self._env.episode_count
+            save_checkpoint = False
+            if checkpoint_num:
+                save_checkpoint = episode_count % checkpoint_num == 0
+            # saves checkpoint if the episode count is not 0 and save_checkpoint flag was set to true
+            if episode_count and save_checkpoint:
+                ...
+                # your code to save checkpoint goes here.
+                # The path should start with self.checkpoints_path and include the episode number.
+
+        def learn(self):
+            ...
+            # call your agent's learning function here.
+
+            super().learn() # this will finalise learning and output session metadata
+            self.save()
+
+        def evaluate(self):
+            ...
+            # call your agent's evaluation function here.
+
+            self._env.close()
+            super().evaluate()
+
+        def _get_latest_checkpoint(self):
+            ...
+            # Load an agent from file.
+
+        @classmethod
+        def load(cls, path):
+            ...
+            #
+
+        def save(self):
+            ...
+            # Call your agent's function that saves it to a file
+
+        def export(self):
+            ...
+            # Call your agent's function that exports it to a transportable file format.
+
+
+
 
-* Generic (run_generic)
-* Stable Baselines 3 PPO (:func:`~primaite.main.run_stable_baselines3_ppo)
-* Stable Baselines 3 A2C (:func:`~primaite.main.run_stable_baselines3_a2c)
 
 The selection of which agent type to use is made via the training config file. In order to train a user generated agent,
 the run_generic function should be selected, and should be modified (typically) to be:
diff --git a/docs/source/primaite_session.rst b/docs/source/primaite_session.rst
index a59b2361..1b48494a 100644
--- a/docs/source/primaite_session.rst
+++ b/docs/source/primaite_session.rst
@@ -78,9 +78,9 @@ PrimAITE automatically creates two sets of results from each session:
     * Timestamp
     * Episode number
     * Step number
-    * Initial observation space (what the blue agent observed when it decided its action)
     * Reward value
     * Action taken (as presented by the blue agent on this step). Individual elements of the action space are presented in the format AS_X
+    * Initial observation space (what the blue agent observed when it decided its action)
 
 **Diagrams**
 

From 43a4f93626a583cbd6611f82cd4114b4c2db3d3a Mon Sep 17 00:00:00 2001
From: Marek Wolan <marek.wolan@methods.co.uk>
Date: Mon, 10 Jul 2023 11:19:47 +0100
Subject: [PATCH 07/32] Changed order of text in custom agent docs

---
 docs/source/custom_agent.rst | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/docs/source/custom_agent.rst b/docs/source/custom_agent.rst
index 53594a8f..74b6a607 100644
--- a/docs/source/custom_agent.rst
+++ b/docs/source/custom_agent.rst
@@ -4,7 +4,7 @@
 
 **Integrating a user defined blue agent**
 
-PrimAITE has integration with Ray RLLib and StableBaselines3 agents. All agents interface with PrimAITE through an :py:class:`primaite.agents.agent.AgentSessionABC<Agent Session>` which provides Input/Output of agent savefiles, as well as capturing and plotting performance metrics during training. If you wish to integrate a custom blue agent, it is recommended to create a subclass of the :py:class:`primaite.agents.agent.AgentSessionABC` and implement the ``__init__()``, ``_setup()``,  ``_save_checkpoint()``, ``learn()``, ``evaluate()``, ``_get_latest_checkpoint``, ``load()``, ``save()``, and ``export()`` methods. You will also need to modify :py:class:`primaite.primaite_session.PrimaiteSession<PrimaiteSession>` class to capture your new agent identifier.
+PrimAITE has integration with Ray RLLib and StableBaselines3 agents. All agents interface with PrimAITE through an :py:class:`primaite.agents.agent.AgentSessionABC<Agent Session>` which provides Input/Output of agent savefiles, as well as capturing and plotting performance metrics during training. If you wish to integrate a custom blue agent, it is recommended to create a subclass of the :py:class:`primaite.agents.agent.AgentSessionABC` and implement the ``__init__()``, ``_setup()``,  ``_save_checkpoint()``, ``learn()``, ``evaluate()``, ``_get_latest_checkpoint``, ``load()``, ``save()``, and ``export()`` methods.
 
 Below is a barebones example of a custom agent implementation:
 
@@ -74,6 +74,9 @@ Below is a barebones example of a custom agent implementation:
             # Call your agent's function that exports it to a transportable file format.
 
 
+You will also need to modify :py:class:`primaite.primaite_session.PrimaiteSession<PrimaiteSession>` class to capture your new agent identifier.
+
+
 
 
 

From 921dc934c266ed9379e97702bd9019073aa15fd5 Mon Sep 17 00:00:00 2001
From: SunilSamra <sunil-samra@nsc.co.uk>
Date: Mon, 10 Jul 2023 11:25:26 +0100
Subject: [PATCH 08/32] 1566 - added correct num_train_episodes etc values to
 configs, fixed test_reward.py

---
 src/primaite/agents/agent.py                           |  1 +
 tests/config/ppo_not_seeded_training_config.yaml       | 10 ++++++++--
 tests/config/ppo_seeded_training_config.yaml           | 10 ++++++++--
 ...le_action_space_fixed_blue_actions_main_config.yaml |  9 ++-------
 tests/config/single_action_space_lay_down_config.yaml  |  8 --------
 tests/config/test_random_red_main_config.yaml          |  8 ++++----
 tests/test_reward.py                                   |  2 +-
 7 files changed, 24 insertions(+), 24 deletions(-)

diff --git a/src/primaite/agents/agent.py b/src/primaite/agents/agent.py
index 883e844b..95a00f49 100644
--- a/src/primaite/agents/agent.py
+++ b/src/primaite/agents/agent.py
@@ -377,6 +377,7 @@ class HardCodedAgentSessionABC(AgentSessionABC):
                 time.sleep(self._training_config.time_delay / 1000)
             obs = self._env.reset()
         self._env.close()
+        super().evaluate()
 
     @classmethod
     def load(cls):
diff --git a/tests/config/ppo_not_seeded_training_config.yaml b/tests/config/ppo_not_seeded_training_config.yaml
index 23cff44e..14b3f087 100644
--- a/tests/config/ppo_not_seeded_training_config.yaml
+++ b/tests/config/ppo_not_seeded_training_config.yaml
@@ -60,10 +60,16 @@ observation_space:
     # - name: NODE_STATUSES
     # - name: LINK_TRAFFIC_LEVELS
 # Number of episodes to run per session
-num_episodes: 10
+num_train_episodes: 10
 
 # Number of time_steps per episode
-num_steps: 256
+num_train_steps: 256
+
+# Number of episodes to run per session
+num_eval_episodes: 10
+
+# Number of time_steps per episode
+num_eval_steps: 256
 
 # Sets how often the agent will save a checkpoint (every n time episodes).
 # Set to 0 if no checkpoints are required. Default is 10
diff --git a/tests/config/ppo_seeded_training_config.yaml b/tests/config/ppo_seeded_training_config.yaml
index 181331d9..a176c793 100644
--- a/tests/config/ppo_seeded_training_config.yaml
+++ b/tests/config/ppo_seeded_training_config.yaml
@@ -60,10 +60,16 @@ observation_space:
     # - name: NODE_STATUSES
     # - name: LINK_TRAFFIC_LEVELS
 # Number of episodes to run per session
-num_episodes: 10
+num_train_episodes: 10
 
 # Number of time_steps per episode
-num_steps: 256
+num_train_steps: 256
+
+# Number of episodes to run per session
+num_eval_episodes: 1
+
+# Number of time_steps per episode
+num_eval_steps: 256
 
 # Sets how often the agent will save a checkpoint (every n time episodes).
 # Set to 0 if no checkpoints are required. Default is 10
diff --git a/tests/config/single_action_space_fixed_blue_actions_main_config.yaml b/tests/config/single_action_space_fixed_blue_actions_main_config.yaml
index 859b2ab3..0f378634 100644
--- a/tests/config/single_action_space_fixed_blue_actions_main_config.yaml
+++ b/tests/config/single_action_space_fixed_blue_actions_main_config.yaml
@@ -23,16 +23,11 @@ agent_identifier: RANDOM
 # "ANY" node and acl actions
 action_type: ANY
 # Number of episodes for training to run per session
-num_train_episodes: 10
+num_train_episodes: 1
 
 # Number of time_steps for training per episode
-num_train_steps: 256
+num_train_steps: 15
 
-# Number of episodes for evaluation to run per session
-num_eval_episodes: 10
-
-# Number of time_steps for evaluation per episode
-num_eval_steps: 256
 # Time delay between steps (for generic agents)
 time_delay: 1
 # Type of session to be run (TRAINING or EVALUATION)
diff --git a/tests/config/single_action_space_lay_down_config.yaml b/tests/config/single_action_space_lay_down_config.yaml
index c80c0bab..9d05b84a 100644
--- a/tests/config/single_action_space_lay_down_config.yaml
+++ b/tests/config/single_action_space_lay_down_config.yaml
@@ -32,14 +32,6 @@
   - name: ftp
     port: '21'
     state: COMPROMISED
-- item_type: POSITION
-  positions:
-  - node: '1'
-    x_pos: 309
-    y_pos: 78
-  - node: '2'
-    x_pos: 200
-    y_pos: 78
 - item_type: RED_IER
   id: '3'
   start_step: 2
diff --git a/tests/config/test_random_red_main_config.yaml b/tests/config/test_random_red_main_config.yaml
index e0fc40ee..e2b24b41 100644
--- a/tests/config/test_random_red_main_config.yaml
+++ b/tests/config/test_random_red_main_config.yaml
@@ -29,16 +29,16 @@ random_red_agent: True
 # "ANY" node and acl actions
 action_type: NODE
 # Number of episodes for training to run per session
-num_train_episodes: 10
+num_train_episodes: 2
 
 # Number of time_steps for training per episode
-num_train_steps: 256
+num_train_steps: 15
 
 # Number of episodes for evaluation to run per session
-num_eval_episodes: 10
+num_eval_episodes: 2
 
 # Number of time_steps for evaluation per episode
-num_eval_steps: 256
+num_eval_steps: 15
 # Time delay between steps (for generic agents)
 time_delay: 1
 
diff --git a/tests/test_reward.py b/tests/test_reward.py
index d1b56671..bb6eb1b0 100644
--- a/tests/test_reward.py
+++ b/tests/test_reward.py
@@ -47,6 +47,6 @@ def test_rewards_are_being_penalised_at_each_step_function(
         Average Reward: -8 (-120 / 15)
     """
     with temp_primaite_session as session:
-        session.close()
+        session.evaluate()
         ev_rewards = session.eval_av_reward_per_episode_csv()
         assert ev_rewards[1] == -8.0

From ca737e080fe3466c56d4c876088c0ee55be5af27 Mon Sep 17 00:00:00 2001
From: Marek Wolan <marek.wolan@methods.co.uk>
Date: Mon, 10 Jul 2023 10:25:26 +0000
Subject: [PATCH 09/32] Changed build pipeline experimentally.

---
 .azure/azure-ci-build-pipeline.yaml | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/.azure/azure-ci-build-pipeline.yaml b/.azure/azure-ci-build-pipeline.yaml
index 902eb38d..fe50fb32 100644
--- a/.azure/azure-ci-build-pipeline.yaml
+++ b/.azure/azure-ci-build-pipeline.yaml
@@ -11,21 +11,27 @@ strategy:
     UbuntuPython38:
       python.version: '3.8'
       imageName: 'ubuntu-latest'
+      condition: eq(variables['Build.Reason'], 'PullRequest')
     UbuntuPython310:
       python.version: '3.10'
       imageName: 'ubuntu-latest'
     WindowsPython38:
       python.version: '3.8'
       imageName: 'windows-latest'
+      condition: eq(variables['Build.Reason'], 'PullRequest')
     WindowsPython310:
       python.version: '3.10'
       imageName: 'windows-latest'
+      condition: eq(variables['Build.Reason'], 'PullRequest')
     MacOSPython38:
       python.version: '3.8'
       imageName: 'macOS-latest'
+      condition: eq(variables['Build.Reason'], 'PullRequest')
     MacOSPython310:
       python.version: '3.10'
       imageName: 'macOS-latest'
+      condition: eq(variables['Build.Reason'], 'PullRequest')
+      # pretty sure this does not support a 'condition' parameter but worth a try. Otherwise a more complicated solution might be warranted.
 
 pool:
   vmImage: $(imageName)

From 563ff72fd646d66ae737977eafde381faf6e8f58 Mon Sep 17 00:00:00 2001
From: SunilSamra <sunil-samra@nsc.co.uk>
Date: Mon, 10 Jul 2023 13:24:34 +0100
Subject: [PATCH 10/32] 1566 - fixed the test_training_config.py test file by
 removing num_steps from init

---
 src/primaite/config/training_config.py                  | 8 ++++----
 tests/config/legacy_conversion/new_training_config.yaml | 5 -----
 2 files changed, 4 insertions(+), 9 deletions(-)

diff --git a/src/primaite/config/training_config.py b/src/primaite/config/training_config.py
index 5bbe881b..785d9757 100644
--- a/src/primaite/config/training_config.py
+++ b/src/primaite/config/training_config.py
@@ -300,7 +300,7 @@ def convert_legacy_training_config_dict(
     agent_framework: AgentFramework = AgentFramework.SB3,
     agent_identifier: AgentIdentifier = AgentIdentifier.PPO,
     action_type: ActionType = ActionType.ANY,
-    num_steps: int = 256,
+    num_train_steps: int = 256,
 ) -> Dict[str, Any]:
     """
     Convert a legacy training config dict to the new format.
@@ -312,15 +312,15 @@ def convert_legacy_training_config_dict(
         training configs don't have agent_identifier values.
     :param action_type: The action space type to set as legacy training configs
         don't have action_type values.
-    :param num_steps: The number of steps to set as legacy training configs
-        don't have num_steps values.
+    :param num_train_steps: The number of steps to set as legacy training configs
+        don't have num_train_steps values.
     :return: The converted training config dict.
     """
     config_dict = {
         "agent_framework": agent_framework.name,
         "agent_identifier": agent_identifier.name,
         "action_type": action_type.name,
-        "num_steps": num_steps,
+        "num_train_steps": num_train_steps,
         "sb3_output_verbose_level": SB3OutputVerboseLevel.INFO.name,
     }
     session_type_map = {"TRAINING": "TRAIN", "EVALUATION": "EVAL"}
diff --git a/tests/config/legacy_conversion/new_training_config.yaml b/tests/config/legacy_conversion/new_training_config.yaml
index 5ca80742..c57741f7 100644
--- a/tests/config/legacy_conversion/new_training_config.yaml
+++ b/tests/config/legacy_conversion/new_training_config.yaml
@@ -26,11 +26,6 @@ num_train_episodes: 10
 # Number of time_steps for training per episode
 num_train_steps: 256
 
-# Number of episodes for evaluation to run per session
-num_eval_episodes: 10
-
-# Number of time_steps for evaluation per episode
-num_eval_steps: 256
 # Time delay between steps (for generic agents)
 time_delay: 10
 # Type of session to be run (TRAINING or EVALUATION)

From 30bcdba429aee12c1c51679a539494a17d96ad25 Mon Sep 17 00:00:00 2001
From: Marek Wolan <marek.wolan@methods.co.uk>
Date: Mon, 10 Jul 2023 14:56:06 +0100
Subject: [PATCH 11/32] Finished writing custom agent example.

---
 docs/source/custom_agent.rst | 106 ++++++++++++++++++-----------------
 1 file changed, 55 insertions(+), 51 deletions(-)

diff --git a/docs/source/custom_agent.rst b/docs/source/custom_agent.rst
index 74b6a607..45d1c5a4 100644
--- a/docs/source/custom_agent.rst
+++ b/docs/source/custom_agent.rst
@@ -2,14 +2,21 @@
 =============
 
 
-**Integrating a user defined blue agent**
+Integrating a user defined blue agent
+*************************************
 
-PrimAITE has integration with Ray RLLib and StableBaselines3 agents. All agents interface with PrimAITE through an :py:class:`primaite.agents.agent.AgentSessionABC<Agent Session>` which provides Input/Output of agent savefiles, as well as capturing and plotting performance metrics during training. If you wish to integrate a custom blue agent, it is recommended to create a subclass of the :py:class:`primaite.agents.agent.AgentSessionABC` and implement the ``__init__()``, ``_setup()``,  ``_save_checkpoint()``, ``learn()``, ``evaluate()``, ``_get_latest_checkpoint``, ``load()``, ``save()``, and ``export()`` methods.
+.. note::
+
+    If you are planning to implement custom RL agents into PrimAITE, you must use the project as a repository. If you install PrimAITE as a python package from wheel, custom agents are not supported.
+
+PrimAITE has integration with Ray RLLib and StableBaselines3 agents. All agents interface with PrimAITE through an :py:class:`primaite.agents.agent.AgentSessionABC<Agent Session>` which provides Input/Output of agent savefiles, as well as capturing and plotting performance metrics during training and evaluation. If you wish to integrate a custom blue agent, it is recommended to create a subclass of the :py:class:`primaite.agents.agent.AgentSessionABC` and implement the ``__init__()``, ``_setup()``,  ``_save_checkpoint()``, ``learn()``, ``evaluate()``, ``_get_latest_checkpoint``, ``load()``, and ``save()`` methods.
 
 Below is a barebones example of a custom agent implementation:
 
 .. code:: python
 
+    # src/primaite/agents/my_custom_agent.py
+
     from primaite.agents.agent import AgentSessionABC
     from primaite.common.enums import AgentFramework, AgentIdentifier
 
@@ -63,72 +70,69 @@ Below is a barebones example of a custom agent implementation:
         @classmethod
         def load(cls, path):
             ...
-            #
+            # Create a CustomAgent object which loads model weights from file.
 
         def save(self):
             ...
             # Call your agent's function that saves it to a file
 
-        def export(self):
-            ...
-            # Call your agent's function that exports it to a transportable file format.
 
+You will also need to modify :py:class:`primaite.primaite_session.PrimaiteSession<PrimaiteSession>` and :py:mod:`primaite.common.enums` to capture your new agent identifiers.
 
-You will also need to modify :py:class:`primaite.primaite_session.PrimaiteSession<PrimaiteSession>` class to capture your new agent identifier.
+.. code-block:: python
+    :emphasize-lines: 17, 18
 
+    # src/primaite/common/enums.py
 
+    class AgentIdentifier(Enum):
+        """The Red Agent algo/class."""
+        A2C = 1
+        "Advantage Actor Critic"
+        PPO = 2
+        "Proximal Policy Optimization"
+        HARDCODED = 3
+        "The Hardcoded agents"
+        DO_NOTHING = 4
+        "The DoNothing agents"
+        RANDOM = 5
+        "The RandomAgent"
+        DUMMY = 6
+        "The DummyAgent"
+        CUSTOM_AGENT = 7
+        "Your custom agent"
 
+.. code-block:: python
+    :emphasize-lines: 3, 11, 12
 
+    # src/primaite_session.py
 
-The selection of which agent type to use is made via the training config file. In order to train a user generated agent,
-the run_generic function should be selected, and should be modified (typically) to be:
+    from primaite.agents.my_custom_agent import CustomAgent
 
-.. code:: python
+    # ...
 
-    agent = MyAgent(environment, num_steps)
-    for episode in range(0, num_episodes):
-        agent.learn()
-    env.close()
-    save_agent(agent)
+        def setup(self):
+        """Performs the session setup."""
+        if self._training_config.agent_framework == AgentFramework.CUSTOM:
+            _LOGGER.debug(f"PrimaiteSession Setup: Agent Framework = {AgentFramework.CUSTOM}")
+            if self._training_config.agent_identifier == AgentIdentifier.CUSTOM_AGENT:
+                self._agent_session = CustomAgent(self._training_config_path, self._lay_down_config_path)
+            if self._training_config.agent_identifier == AgentIdentifier.HARDCODED:
+                _LOGGER.debug(f"PrimaiteSession Setup: Agent Identifier =" f" {AgentIdentifier.HARDCODED}")
+                if self._training_config.action_type == ActionType.NODE:
+                    # Deterministic Hardcoded Agent with Node Action Space
+                    self._agent_session = HardCodedNodeAgent(self._training_config_path, self._lay_down_config_path)
 
-Where:
+Finally, specify your agent in your training config.
 
-* *MyAgent* is the user created agent
-* *environment* is the :class:`~primaite.environment.primaite_env.Primaite` environment
-* *num_episodes* is the number of episodes in the session, as defined in the training config file
-* *num_steps* is the number of steps in an episode, as defined in the training config file
-* the *.learn()* function should be defined in the user created agent
-* the *env.close()* function is defined within PrimAITE
-* the *save_agent()* assumes that a *save()* function has been defined in the user created agent. If not, this line can
-  be ommitted (although it is encouraged, since it will allow the agent to be saved and ported)
+.. code-block:: yaml
 
-The code below provides a suggested format for the learn() function within the user created agent.
-It's important to include the *self.environment.reset()* call within the episode loop in order that the
-environment is reset between episodes. Note that the example below should not be considered exhaustive.
+    # ~/primaite/config/path/to/your/config_main.yaml
 
-.. code:: python
+    # Training Config File
 
-    def learn(self) :
+    agent_framework: CUSTOM
+    agent_identifier: CUSTOM_AGENT
+    random_red_agent: False
+    # ...
 
-    # pre-reqs
-
-    # reset the environment
-    self.environment.reset()
-    done = False
-
-    for step in range(max_steps):
-        # calculate the action
-        action = ...
-
-        # execute the environment step
-        new_state, reward, done, info = self.environment.step(action)
-
-        # algorithm updates
-        ...
-
-        # update to our new state
-        state = new_state
-
-        # if done, finish episode
-        if done == True:
-            break
+Now you can `Run a PrimAITE Session<run a primaite session>` with your custom agent by passing in the custom ``config_main``.

From 831469d01c59ea14e0a5bdaea859cc2dd92e1991 Mon Sep 17 00:00:00 2001
From: Marek Wolan <marek.wolan@methods.co.uk>
Date: Tue, 11 Jul 2023 07:16:11 +0000
Subject: [PATCH 12/32] Built matrix conditionally

---
 .azure/azure-ci-build-pipeline.yaml | 47 ++++++++++++++---------------
 1 file changed, 23 insertions(+), 24 deletions(-)

diff --git a/.azure/azure-ci-build-pipeline.yaml b/.azure/azure-ci-build-pipeline.yaml
index fe50fb32..bb9c03fa 100644
--- a/.azure/azure-ci-build-pipeline.yaml
+++ b/.azure/azure-ci-build-pipeline.yaml
@@ -8,30 +8,29 @@ trigger:
 
 strategy:
   matrix:
-    UbuntuPython38:
-      python.version: '3.8'
-      imageName: 'ubuntu-latest'
-      condition: eq(variables['Build.Reason'], 'PullRequest')
-    UbuntuPython310:
-      python.version: '3.10'
-      imageName: 'ubuntu-latest'
-    WindowsPython38:
-      python.version: '3.8'
-      imageName: 'windows-latest'
-      condition: eq(variables['Build.Reason'], 'PullRequest')
-    WindowsPython310:
-      python.version: '3.10'
-      imageName: 'windows-latest'
-      condition: eq(variables['Build.Reason'], 'PullRequest')
-    MacOSPython38:
-      python.version: '3.8'
-      imageName: 'macOS-latest'
-      condition: eq(variables['Build.Reason'], 'PullRequest')
-    MacOSPython310:
-      python.version: '3.10'
-      imageName: 'macOS-latest'
-      condition: eq(variables['Build.Reason'], 'PullRequest')
-      # pretty sure this does not support a 'condition' parameter but worth a try. Otherwise a more complicated solution might be warranted.
+    ${{ if eq(variables['Build.Reason'], 'PullRequest') }}:
+      UbuntuPython38:
+        python.version: '3.8'
+        imageName: 'ubuntu-latest'
+      UbuntuPython310:
+        python.version: '3.10'
+        imageName: 'ubuntu-latest'
+      WindowsPython38:
+        python.version: '3.8'
+        imageName: 'windows-latest'
+      WindowsPython310:
+        python.version: '3.10'
+        imageName: 'windows-latest'
+      MacOSPython38:
+        python.version: '3.8'
+        imageName: 'macOS-latest'
+      MacOSPython310:
+        python.version: '3.10'
+        imageName: 'macOS-latest'
+    ${{ if not(eq(variables['Build.Reason'], 'PullRequest')) }}:
+      UbuntuPython310:
+        python.version: '3.10'
+        imageName: 'ubuntu-latest'
 
 pool:
   vmImage: $(imageName)

From d8cfbc104211334935f91b383ead0821edc4e8ad Mon Sep 17 00:00:00 2001
From: Marek Wolan <marek.wolan@methods.co.uk>
Date: Tue, 11 Jul 2023 07:19:58 +0000
Subject: [PATCH 13/32] Updated azure-ci-build-pipeline.yaml

---
 .azure/azure-ci-build-pipeline.yaml | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/.azure/azure-ci-build-pipeline.yaml b/.azure/azure-ci-build-pipeline.yaml
index bb9c03fa..9c21577b 100644
--- a/.azure/azure-ci-build-pipeline.yaml
+++ b/.azure/azure-ci-build-pipeline.yaml
@@ -7,8 +7,8 @@ trigger:
 - release/*
 
 strategy:
-  matrix:
-    ${{ if eq(variables['Build.Reason'], 'PullRequest') }}:
+  ${{ if eq(variables['Build.Reason'], 'PullRequest') }}:
+    matrix:
       UbuntuPython38:
         python.version: '3.8'
         imageName: 'ubuntu-latest'
@@ -27,7 +27,8 @@ strategy:
       MacOSPython310:
         python.version: '3.10'
         imageName: 'macOS-latest'
-    ${{ if not(eq(variables['Build.Reason'], 'PullRequest')) }}:
+  ${{ if not(eq(variables['Build.Reason'], 'PullRequest')) }}:
+    matrix:
       UbuntuPython310:
         python.version: '3.10'
         imageName: 'ubuntu-latest'

From 548ecf8e0824da7c161fd6d8da5bf11f7daa651f Mon Sep 17 00:00:00 2001
From: Marek Wolan <marek.wolan@methods.co.uk>
Date: Tue, 11 Jul 2023 08:05:38 +0000
Subject: [PATCH 14/32] Edit pipeline to use runtime parameters

https://stackoverflow.com/a/70046417
---
 .azure/azure-ci-build-pipeline.yaml | 124 +++++++++++++++-------------
 1 file changed, 66 insertions(+), 58 deletions(-)

diff --git a/.azure/azure-ci-build-pipeline.yaml b/.azure/azure-ci-build-pipeline.yaml
index 9c21577b..b1557b7b 100644
--- a/.azure/azure-ci-build-pipeline.yaml
+++ b/.azure/azure-ci-build-pipeline.yaml
@@ -6,75 +6,83 @@ trigger:
 - bugfix/*
 - release/*
 
-strategy:
-  ${{ if eq(variables['Build.Reason'], 'PullRequest') }}:
-    matrix:
+parameters:
+  # https://stackoverflow.com/a/70046417
+  - name: matrix
+    type: object
+    default:
       UbuntuPython38:
-        python.version: '3.8'
-        imageName: 'ubuntu-latest'
+        py: '3.8'
+        img: 'ubuntu-latest'
+        only_pr: false
       UbuntuPython310:
-        python.version: '3.10'
-        imageName: 'ubuntu-latest'
+        py: '3.10'
+        img: 'ubuntu-latest'
+        only_pr: true
       WindowsPython38:
-        python.version: '3.8'
-        imageName: 'windows-latest'
+        py: '3.8'
+        img: 'windows-latest'
+        only_pr: false
       WindowsPython310:
-        python.version: '3.10'
-        imageName: 'windows-latest'
+        py: '3.10'
+        img: 'windows-latest'
+        only_pr: false
       MacOSPython38:
-        python.version: '3.8'
-        imageName: 'macOS-latest'
+        py: '3.8'
+        img: 'macOS-latest'
+        only_pr: false
       MacOSPython310:
-        python.version: '3.10'
-        imageName: 'macOS-latest'
-  ${{ if not(eq(variables['Build.Reason'], 'PullRequest')) }}:
-    matrix:
-      UbuntuPython310:
-        python.version: '3.10'
-        imageName: 'ubuntu-latest'
+        py: '3.10'
+        img: 'macOS-latest'
+        only_pr: false
 
-pool:
-  vmImage: $(imageName)
+stages:
+  - stage: Test
+    jobs:
+      - ${{ each item in parameters.matrix }}:
+          ${{ if or(eq(variables['Build.Reason'], 'PullRequest'), item.value.on_pr) }}:
+            - job: ${{ item.Key }}
+              pool:
+                vmImage: ${{ item.Value.img }}
+              steps:
+              - task: UsePythonVersion@0
+                inputs:
+                  versionSpec: ${{ item.Value.py }}
+                displayName: 'Use Python ${{ item.Value.py }}'
 
-steps:
-- task: UsePythonVersion@0
-  inputs:
-    versionSpec: '$(python.version)'
-  displayName: 'Use Python $(python.version)'
+              - script: |
+                  python -m pip install pre-commit
+                  pre-commit install
+                  pre-commit run --all-files
+                displayName: 'Run pre-commits'
 
-- script: |
-    python -m pip install pre-commit
-    pre-commit install
-    pre-commit run --all-files
-  displayName: 'Run pre-commits'
+              - script: |
+                  python -m pip install --upgrade pip==23.0.1
+                  pip install wheel==0.38.4 --upgrade
+                  pip install setuptools==66 --upgrade
+                  pip install build==0.10.0
+                  pip install pytest-azurepipelines
+                displayName: 'Install build dependencies'
 
-- script: |
-    python -m pip install --upgrade pip==23.0.1
-    pip install wheel==0.38.4 --upgrade
-    pip install setuptools==66 --upgrade
-    pip install build==0.10.0
-    pip install pytest-azurepipelines
-  displayName: 'Install build dependencies'
+              - script: |
+                  python -m build
+                displayName: 'Build PrimAITE'
 
-- script: |
-    python -m build
-  displayName: 'Build PrimAITE'
+              - script: |
+                  PRIMAITE_WHEEL=$(ls ./dist/primaite*.whl)
+                  python -m pip install $PRIMAITE_WHEEL[dev]
+                displayName: 'Install PrimAITE'
+                condition: or(eq( variables['Agent.OS'], 'Linux' ), eq( variables['Agent.OS'], 'Darwin' ))
 
-- script: |
-    PRIMAITE_WHEEL=$(ls ./dist/primaite*.whl)
-    python -m pip install $PRIMAITE_WHEEL[dev]
-  displayName: 'Install PrimAITE'
-  condition: or(eq( variables['Agent.OS'], 'Linux' ), eq( variables['Agent.OS'], 'Darwin' ))
+              - script: |
+                  forfiles /p dist\ /m *.whl /c "cmd /c python -m pip install @file[dev]"
+                displayName: 'Install PrimAITE'
+                condition: eq( variables['Agent.OS'], 'Windows_NT' )
 
-- script: |
-    forfiles /p dist\ /m *.whl /c "cmd /c python -m pip install @file[dev]"
-  displayName: 'Install PrimAITE'
-  condition: eq( variables['Agent.OS'], 'Windows_NT' )
+              - script: |
+                  primaite setup
+                displayName: 'Perform PrimAITE Setup'
 
-- script: |
-    primaite setup
-  displayName: 'Perform PrimAITE Setup'
-
-- script: |
-    pytest tests/
-  displayName: 'Run tests'
+              - script: |
+                  pytest tests/
+                displayName: 'Run tests'

From d555584e90a06082e6631a8794890fe8a6fc995c Mon Sep 17 00:00:00 2001
From: Marek Wolan <marek.wolan@methods.co.uk>
Date: Tue, 11 Jul 2023 08:08:29 +0000
Subject: [PATCH 15/32] Potentially fix syntax error

---
 .azure/azure-ci-build-pipeline.yaml | 68 ++++++++++++++---------------
 1 file changed, 34 insertions(+), 34 deletions(-)

diff --git a/.azure/azure-ci-build-pipeline.yaml b/.azure/azure-ci-build-pipeline.yaml
index b1557b7b..ba2c6d51 100644
--- a/.azure/azure-ci-build-pipeline.yaml
+++ b/.azure/azure-ci-build-pipeline.yaml
@@ -45,44 +45,44 @@ stages:
               pool:
                 vmImage: ${{ item.Value.img }}
               steps:
-              - task: UsePythonVersion@0
-                inputs:
-                  versionSpec: ${{ item.Value.py }}
-                displayName: 'Use Python ${{ item.Value.py }}'
+                - task: UsePythonVersion@0
+                  inputs:
+                    versionSpec: ${{ item.Value.py }}
+                  displayName: 'Use Python ${{ item.Value.py }}'
 
-              - script: |
-                  python -m pip install pre-commit
-                  pre-commit install
-                  pre-commit run --all-files
-                displayName: 'Run pre-commits'
+                - script: |
+                    python -m pip install pre-commit
+                    pre-commit install
+                    pre-commit run --all-files
+                  displayName: 'Run pre-commits'
 
-              - script: |
-                  python -m pip install --upgrade pip==23.0.1
-                  pip install wheel==0.38.4 --upgrade
-                  pip install setuptools==66 --upgrade
-                  pip install build==0.10.0
-                  pip install pytest-azurepipelines
-                displayName: 'Install build dependencies'
+                - script: |
+                    python -m pip install --upgrade pip==23.0.1
+                    pip install wheel==0.38.4 --upgrade
+                    pip install setuptools==66 --upgrade
+                    pip install build==0.10.0
+                    pip install pytest-azurepipelines
+                  displayName: 'Install build dependencies'
 
-              - script: |
-                  python -m build
-                displayName: 'Build PrimAITE'
+                - script: |
+                    python -m build
+                  displayName: 'Build PrimAITE'
 
-              - script: |
-                  PRIMAITE_WHEEL=$(ls ./dist/primaite*.whl)
-                  python -m pip install $PRIMAITE_WHEEL[dev]
-                displayName: 'Install PrimAITE'
-                condition: or(eq( variables['Agent.OS'], 'Linux' ), eq( variables['Agent.OS'], 'Darwin' ))
+                - script: |
+                    PRIMAITE_WHEEL=$(ls ./dist/primaite*.whl)
+                    python -m pip install $PRIMAITE_WHEEL[dev]
+                  displayName: 'Install PrimAITE'
+                  condition: or(eq( variables['Agent.OS'], 'Linux' ), eq( variables['Agent.OS'], 'Darwin' ))
 
-              - script: |
-                  forfiles /p dist\ /m *.whl /c "cmd /c python -m pip install @file[dev]"
-                displayName: 'Install PrimAITE'
-                condition: eq( variables['Agent.OS'], 'Windows_NT' )
+                - script: |
+                    forfiles /p dist\ /m *.whl /c "cmd /c python -m pip install @file[dev]"
+                  displayName: 'Install PrimAITE'
+                  condition: eq( variables['Agent.OS'], 'Windows_NT' )
 
-              - script: |
-                  primaite setup
-                displayName: 'Perform PrimAITE Setup'
+                - script: |
+                    primaite setup
+                  displayName: 'Perform PrimAITE Setup'
 
-              - script: |
-                  pytest tests/
-                displayName: 'Run tests'
+                - script: |
+                    pytest tests/
+                  displayName: 'Run tests'

From c8191e60ba0c9c1800a5ffc9c41777e7f2576c96 Mon Sep 17 00:00:00 2001
From: Marek Wolan <marek.wolan@methods.co.uk>
Date: Tue, 11 Jul 2023 08:14:08 +0000
Subject: [PATCH 16/32] Typo in word only

---
 .azure/azure-ci-build-pipeline.yaml | 78 ++++++++++++++---------------
 1 file changed, 39 insertions(+), 39 deletions(-)

diff --git a/.azure/azure-ci-build-pipeline.yaml b/.azure/azure-ci-build-pipeline.yaml
index ba2c6d51..276f0ef1 100644
--- a/.azure/azure-ci-build-pipeline.yaml
+++ b/.azure/azure-ci-build-pipeline.yaml
@@ -40,49 +40,49 @@ stages:
   - stage: Test
     jobs:
       - ${{ each item in parameters.matrix }}:
-          ${{ if or(eq(variables['Build.Reason'], 'PullRequest'), item.value.on_pr) }}:
-            - job: ${{ item.Key }}
-              pool:
-                vmImage: ${{ item.Value.img }}
-              steps:
-                - task: UsePythonVersion@0
-                  inputs:
-                    versionSpec: ${{ item.Value.py }}
-                  displayName: 'Use Python ${{ item.Value.py }}'
+        ${{ if or(eq(variables['Build.Reason'], 'PullRequest'), item.value.only_pr) }}:
+          - job: ${{ item.Key }}
+            pool:
+              vmImage: ${{ item.Value.img }}
+            steps:
+              - task: UsePythonVersion@0
+                inputs:
+                  versionSpec: ${{ item.Value.py }}
+                displayName: 'Use Python ${{ item.Value.py }}'
 
-                - script: |
-                    python -m pip install pre-commit
-                    pre-commit install
-                    pre-commit run --all-files
-                  displayName: 'Run pre-commits'
+              - script: |
+                  python -m pip install pre-commit
+                  pre-commit install
+                  pre-commit run --all-files
+                displayName: 'Run pre-commits'
 
-                - script: |
-                    python -m pip install --upgrade pip==23.0.1
-                    pip install wheel==0.38.4 --upgrade
-                    pip install setuptools==66 --upgrade
-                    pip install build==0.10.0
-                    pip install pytest-azurepipelines
-                  displayName: 'Install build dependencies'
+              - script: |
+                  python -m pip install --upgrade pip==23.0.1
+                  pip install wheel==0.38.4 --upgrade
+                  pip install setuptools==66 --upgrade
+                  pip install build==0.10.0
+                  pip install pytest-azurepipelines
+                displayName: 'Install build dependencies'
 
-                - script: |
-                    python -m build
-                  displayName: 'Build PrimAITE'
+              - script: |
+                  python -m build
+                displayName: 'Build PrimAITE'
 
-                - script: |
-                    PRIMAITE_WHEEL=$(ls ./dist/primaite*.whl)
-                    python -m pip install $PRIMAITE_WHEEL[dev]
-                  displayName: 'Install PrimAITE'
-                  condition: or(eq( variables['Agent.OS'], 'Linux' ), eq( variables['Agent.OS'], 'Darwin' ))
+              - script: |
+                  PRIMAITE_WHEEL=$(ls ./dist/primaite*.whl)
+                  python -m pip install $PRIMAITE_WHEEL[dev]
+                displayName: 'Install PrimAITE'
+                condition: or(eq( variables['Agent.OS'], 'Linux' ), eq( variables['Agent.OS'], 'Darwin' ))
 
-                - script: |
-                    forfiles /p dist\ /m *.whl /c "cmd /c python -m pip install @file[dev]"
-                  displayName: 'Install PrimAITE'
-                  condition: eq( variables['Agent.OS'], 'Windows_NT' )
+              - script: |
+                  forfiles /p dist\ /m *.whl /c "cmd /c python -m pip install @file[dev]"
+                displayName: 'Install PrimAITE'
+                condition: eq( variables['Agent.OS'], 'Windows_NT' )
 
-                - script: |
-                    primaite setup
-                  displayName: 'Perform PrimAITE Setup'
+              - script: |
+                  primaite setup
+                displayName: 'Perform PrimAITE Setup'
 
-                - script: |
-                    pytest tests/
-                  displayName: 'Run tests'
+              - script: |
+                  pytest tests/
+                displayName: 'Run tests'

From 7f64d06ad42f9230e922c96b42d5649f6ead420a Mon Sep 17 00:00:00 2001
From: Marek Wolan <marek.wolan@methods.co.uk>
Date: Tue, 11 Jul 2023 08:14:34 +0000
Subject: [PATCH 17/32] Fix indent

---
 .azure/azure-ci-build-pipeline.yaml | 78 ++++++++++++++---------------
 1 file changed, 39 insertions(+), 39 deletions(-)

diff --git a/.azure/azure-ci-build-pipeline.yaml b/.azure/azure-ci-build-pipeline.yaml
index 276f0ef1..e1a26924 100644
--- a/.azure/azure-ci-build-pipeline.yaml
+++ b/.azure/azure-ci-build-pipeline.yaml
@@ -40,49 +40,49 @@ stages:
   - stage: Test
     jobs:
       - ${{ each item in parameters.matrix }}:
-        ${{ if or(eq(variables['Build.Reason'], 'PullRequest'), item.value.only_pr) }}:
-          - job: ${{ item.Key }}
-            pool:
-              vmImage: ${{ item.Value.img }}
-            steps:
-              - task: UsePythonVersion@0
-                inputs:
-                  versionSpec: ${{ item.Value.py }}
-                displayName: 'Use Python ${{ item.Value.py }}'
+          ${{ if or(eq(variables['Build.Reason'], 'PullRequest'), item.value.only_pr) }}:
+            - job: ${{ item.Key }}
+              pool:
+                vmImage: ${{ item.Value.img }}
+              steps:
+                - task: UsePythonVersion@0
+                  inputs:
+                    versionSpec: ${{ item.Value.py }}
+                  displayName: 'Use Python ${{ item.Value.py }}'
 
-              - script: |
-                  python -m pip install pre-commit
-                  pre-commit install
-                  pre-commit run --all-files
-                displayName: 'Run pre-commits'
+                - script: |
+                    python -m pip install pre-commit
+                    pre-commit install
+                    pre-commit run --all-files
+                  displayName: 'Run pre-commits'
 
-              - script: |
-                  python -m pip install --upgrade pip==23.0.1
-                  pip install wheel==0.38.4 --upgrade
-                  pip install setuptools==66 --upgrade
-                  pip install build==0.10.0
-                  pip install pytest-azurepipelines
-                displayName: 'Install build dependencies'
+                - script: |
+                    python -m pip install --upgrade pip==23.0.1
+                    pip install wheel==0.38.4 --upgrade
+                    pip install setuptools==66 --upgrade
+                    pip install build==0.10.0
+                    pip install pytest-azurepipelines
+                  displayName: 'Install build dependencies'
 
-              - script: |
-                  python -m build
-                displayName: 'Build PrimAITE'
+                - script: |
+                    python -m build
+                  displayName: 'Build PrimAITE'
 
-              - script: |
-                  PRIMAITE_WHEEL=$(ls ./dist/primaite*.whl)
-                  python -m pip install $PRIMAITE_WHEEL[dev]
-                displayName: 'Install PrimAITE'
-                condition: or(eq( variables['Agent.OS'], 'Linux' ), eq( variables['Agent.OS'], 'Darwin' ))
+                - script: |
+                    PRIMAITE_WHEEL=$(ls ./dist/primaite*.whl)
+                    python -m pip install $PRIMAITE_WHEEL[dev]
+                  displayName: 'Install PrimAITE'
+                  condition: or(eq( variables['Agent.OS'], 'Linux' ), eq( variables['Agent.OS'], 'Darwin' ))
 
-              - script: |
-                  forfiles /p dist\ /m *.whl /c "cmd /c python -m pip install @file[dev]"
-                displayName: 'Install PrimAITE'
-                condition: eq( variables['Agent.OS'], 'Windows_NT' )
+                - script: |
+                    forfiles /p dist\ /m *.whl /c "cmd /c python -m pip install @file[dev]"
+                  displayName: 'Install PrimAITE'
+                  condition: eq( variables['Agent.OS'], 'Windows_NT' )
 
-              - script: |
-                  primaite setup
-                displayName: 'Perform PrimAITE Setup'
+                - script: |
+                    primaite setup
+                  displayName: 'Perform PrimAITE Setup'
 
-              - script: |
-                  pytest tests/
-                displayName: 'Run tests'
+                - script: |
+                    pytest tests/
+                  displayName: 'Run tests'

From c641f67914024afdd6600df797bfbf5751f12bf4 Mon Sep 17 00:00:00 2001
From: Marek Wolan <marek.wolan@methods.co.uk>
Date: Tue, 11 Jul 2023 08:15:16 +0000
Subject: [PATCH 18/32] Capitalisation error in value

---
 .azure/azure-ci-build-pipeline.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.azure/azure-ci-build-pipeline.yaml b/.azure/azure-ci-build-pipeline.yaml
index e1a26924..49fd7174 100644
--- a/.azure/azure-ci-build-pipeline.yaml
+++ b/.azure/azure-ci-build-pipeline.yaml
@@ -40,7 +40,7 @@ stages:
   - stage: Test
     jobs:
       - ${{ each item in parameters.matrix }}:
-          ${{ if or(eq(variables['Build.Reason'], 'PullRequest'), item.value.only_pr) }}:
+          ${{ if or(eq(variables['Build.Reason'], 'PullRequest'), item.Value.only_pr) }}:
             - job: ${{ item.Key }}
               pool:
                 vmImage: ${{ item.Value.img }}

From 81a8058836ac30259ca570e0c912e2b385cdbc55 Mon Sep 17 00:00:00 2001
From: Marek Wolan <marek.wolan@methods.co.uk>
Date: Tue, 11 Jul 2023 08:22:30 +0000
Subject: [PATCH 19/32] Change parameter matrix to list instead of dict

---
 .azure/azure-ci-build-pipeline.yaml | 128 ++++++++++++++--------------
 1 file changed, 64 insertions(+), 64 deletions(-)

diff --git a/.azure/azure-ci-build-pipeline.yaml b/.azure/azure-ci-build-pipeline.yaml
index 49fd7174..de760316 100644
--- a/.azure/azure-ci-build-pipeline.yaml
+++ b/.azure/azure-ci-build-pipeline.yaml
@@ -11,78 +11,78 @@ parameters:
   - name: matrix
     type: object
     default:
-      UbuntuPython38:
-        py: '3.8'
-        img: 'ubuntu-latest'
-        only_pr: false
-      UbuntuPython310:
-        py: '3.10'
-        img: 'ubuntu-latest'
-        only_pr: true
-      WindowsPython38:
-        py: '3.8'
-        img: 'windows-latest'
-        only_pr: false
-      WindowsPython310:
-        py: '3.10'
-        img: 'windows-latest'
-        only_pr: false
-      MacOSPython38:
-        py: '3.8'
-        img: 'macOS-latest'
-        only_pr: false
-      MacOSPython310:
-        py: '3.10'
-        img: 'macOS-latest'
-        only_pr: false
+    - job_name: 'UbuntuPython38'
+      py: '3.8'
+      img: 'ubuntu-latest'
+      only_pr: false
+    - job_name: 'UbuntuPython310'
+      py: '3.10'
+      img: 'ubuntu-latest'
+      only_pr: true
+    - job_name: 'WindowsPython38'
+      py: '3.8'
+      img: 'windows-latest'
+      only_pr: false
+    - job_name: 'WindowsPython310'
+      py: '3.10'
+      img: 'windows-latest'
+      only_pr: false
+    - job_name: 'MacOSPython38'
+      py: '3.8'
+      img: 'macOS-latest'
+      only_pr: false
+    - job_name: 'MacOSPython310'
+      py: '3.10'
+      img: 'macOS-latest'
+      only_pr: false
 
 stages:
   - stage: Test
     jobs:
-      - ${{ each item in parameters.matrix }}:
-          ${{ if or(eq(variables['Build.Reason'], 'PullRequest'), item.Value.only_pr) }}:
-            - job: ${{ item.Key }}
-              pool:
-                vmImage: ${{ item.Value.img }}
-              steps:
-                - task: UsePythonVersion@0
-                  inputs:
-                    versionSpec: ${{ item.Value.py }}
-                  displayName: 'Use Python ${{ item.Value.py }}'
+    - ${{ each item in parameters.matrix }}:
+        ${{ if or(eq(variables['Build.Reason'], 'PullRequest'), item.only_pr) }}:
+          - job: ${{ item.job_name }}
+            pool:
+              vmImage: ${{ item.img }}
+            steps:
+              - task: UsePythonVersion@0
+                inputs:
+                  versionSpec: ${{ item.py }}
+                displayName: 'Use Python ${{ item.py }}'
 
-                - script: |
-                    python -m pip install pre-commit
-                    pre-commit install
-                    pre-commit run --all-files
-                  displayName: 'Run pre-commits'
+              - script: |
+                  python -m pip install pre-commit
+                  pre-commit install
+                  pre-commit run --all-files
+                displayName: 'Run pre-commits'
 
-                - script: |
-                    python -m pip install --upgrade pip==23.0.1
-                    pip install wheel==0.38.4 --upgrade
-                    pip install setuptools==66 --upgrade
-                    pip install build==0.10.0
-                    pip install pytest-azurepipelines
-                  displayName: 'Install build dependencies'
+              - script: |
+                  python -m pip install --upgrade pip==23.0.1
+                  pip install wheel==0.38.4 --upgrade
+                  pip install setuptools==66 --upgrade
+                  pip install build==0.10.0
+                  pip install pytest-azurepipelines
+                displayName: 'Install build dependencies'
 
-                - script: |
-                    python -m build
-                  displayName: 'Build PrimAITE'
+              - script: |
+                  python -m build
+                displayName: 'Build PrimAITE'
 
-                - script: |
-                    PRIMAITE_WHEEL=$(ls ./dist/primaite*.whl)
-                    python -m pip install $PRIMAITE_WHEEL[dev]
-                  displayName: 'Install PrimAITE'
-                  condition: or(eq( variables['Agent.OS'], 'Linux' ), eq( variables['Agent.OS'], 'Darwin' ))
+              - script: |
+                  PRIMAITE_WHEEL=$(ls ./dist/primaite*.whl)
+                  python -m pip install $PRIMAITE_WHEEL[dev]
+                displayName: 'Install PrimAITE'
+                condition: or(eq( variables['Agent.OS'], 'Linux' ), eq( variables['Agent.OS'], 'Darwin' ))
 
-                - script: |
-                    forfiles /p dist\ /m *.whl /c "cmd /c python -m pip install @file[dev]"
-                  displayName: 'Install PrimAITE'
-                  condition: eq( variables['Agent.OS'], 'Windows_NT' )
+              - script: |
+                  forfiles /p dist\ /m *.whl /c "cmd /c python -m pip install @file[dev]"
+                displayName: 'Install PrimAITE'
+                condition: eq( variables['Agent.OS'], 'Windows_NT' )
 
-                - script: |
-                    primaite setup
-                  displayName: 'Perform PrimAITE Setup'
+              - script: |
+                  primaite setup
+                displayName: 'Perform PrimAITE Setup'
 
-                - script: |
-                    pytest tests/
-                  displayName: 'Run tests'
+              - script: |
+                  pytest tests/
+                displayName: 'Run tests'

From a303e9096a8cf6a1d31698a7a9d841b270969317 Mon Sep 17 00:00:00 2001
From: Marek Wolan <marek.wolan@methods.co.uk>
Date: Tue, 11 Jul 2023 08:53:37 +0000
Subject: [PATCH 20/32] Changed structure of build pipeline yaml

---
 .azure/azure-ci-build-pipeline.yaml | 92 +++++++++++++++--------------
 1 file changed, 47 insertions(+), 45 deletions(-)

diff --git a/.azure/azure-ci-build-pipeline.yaml b/.azure/azure-ci-build-pipeline.yaml
index de760316..d8568d46 100644
--- a/.azure/azure-ci-build-pipeline.yaml
+++ b/.azure/azure-ci-build-pipeline.yaml
@@ -14,75 +14,77 @@ parameters:
     - job_name: 'UbuntuPython38'
       py: '3.8'
       img: 'ubuntu-latest'
-      only_pr: false
+      every_time: false
     - job_name: 'UbuntuPython310'
       py: '3.10'
       img: 'ubuntu-latest'
-      only_pr: true
+      every_time: true
     - job_name: 'WindowsPython38'
       py: '3.8'
       img: 'windows-latest'
-      only_pr: false
+      every_time: false
     - job_name: 'WindowsPython310'
       py: '3.10'
       img: 'windows-latest'
-      only_pr: false
+      every_time: false
     - job_name: 'MacOSPython38'
       py: '3.8'
       img: 'macOS-latest'
-      only_pr: false
+      every_time: false
     - job_name: 'MacOSPython310'
       py: '3.10'
       img: 'macOS-latest'
-      only_pr: false
+      every_time: false
 
 stages:
   - stage: Test
     jobs:
     - ${{ each item in parameters.matrix }}:
-        ${{ if or(eq(variables['Build.Reason'], 'PullRequest'), item.only_pr) }}:
-          - job: ${{ item.job_name }}
-            pool:
-              vmImage: ${{ item.img }}
-            steps:
-              - task: UsePythonVersion@0
-                inputs:
-                  versionSpec: ${{ item.py }}
-                displayName: 'Use Python ${{ item.py }}'
+      - job: ${{ item.job_name }}
+        pool:
+          vmImage: ${{ item.img }}
+        
+        condition: or( eq(variables['BuildReason'], 'PullRequest'), item.every_time )
 
-              - script: |
-                  python -m pip install pre-commit
-                  pre-commit install
-                  pre-commit run --all-files
-                displayName: 'Run pre-commits'
+        steps:
+          - task: UsePythonVersion@0
+            inputs:
+              versionSpec: ${{ item.py }}
+            displayName: 'Use Python ${{ item.py }}'
 
-              - script: |
-                  python -m pip install --upgrade pip==23.0.1
-                  pip install wheel==0.38.4 --upgrade
-                  pip install setuptools==66 --upgrade
-                  pip install build==0.10.0
-                  pip install pytest-azurepipelines
-                displayName: 'Install build dependencies'
+          - script: |
+              python -m pip install pre-commit
+              pre-commit install
+              pre-commit run --all-files
+            displayName: 'Run pre-commits'
 
-              - script: |
-                  python -m build
-                displayName: 'Build PrimAITE'
+          - script: |
+              python -m pip install --upgrade pip==23.0.1
+              pip install wheel==0.38.4 --upgrade
+              pip install setuptools==66 --upgrade
+              pip install build==0.10.0
+              pip install pytest-azurepipelines
+            displayName: 'Install build dependencies'
 
-              - script: |
-                  PRIMAITE_WHEEL=$(ls ./dist/primaite*.whl)
-                  python -m pip install $PRIMAITE_WHEEL[dev]
-                displayName: 'Install PrimAITE'
-                condition: or(eq( variables['Agent.OS'], 'Linux' ), eq( variables['Agent.OS'], 'Darwin' ))
+          - script: |
+              python -m build
+            displayName: 'Build PrimAITE'
 
-              - script: |
-                  forfiles /p dist\ /m *.whl /c "cmd /c python -m pip install @file[dev]"
-                displayName: 'Install PrimAITE'
-                condition: eq( variables['Agent.OS'], 'Windows_NT' )
+          - script: |
+              PRIMAITE_WHEEL=$(ls ./dist/primaite*.whl)
+              python -m pip install $PRIMAITE_WHEEL[dev]
+            displayName: 'Install PrimAITE'
+            condition: or(eq( variables['Agent.OS'], 'Linux' ), eq( variables['Agent.OS'], 'Darwin' ))
 
-              - script: |
-                  primaite setup
-                displayName: 'Perform PrimAITE Setup'
+          - script: |
+              forfiles /p dist\ /m *.whl /c "cmd /c python -m pip install @file[dev]"
+            displayName: 'Install PrimAITE'
+            condition: eq( variables['Agent.OS'], 'Windows_NT' )
 
-              - script: |
-                  pytest tests/
-                displayName: 'Run tests'
+          - script: |
+              primaite setup
+            displayName: 'Perform PrimAITE Setup'
+
+          - script: |
+              pytest tests/
+            displayName: 'Run tests'

From dcf5bfddfaee41081a14fd8acc3b622a2f06bace Mon Sep 17 00:00:00 2001
From: Marek Wolan <marek.wolan@methods.co.uk>
Date: Tue, 11 Jul 2023 08:54:22 +0000
Subject: [PATCH 21/32] Fix syntax

---
 .azure/azure-ci-build-pipeline.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.azure/azure-ci-build-pipeline.yaml b/.azure/azure-ci-build-pipeline.yaml
index d8568d46..e3f11b7f 100644
--- a/.azure/azure-ci-build-pipeline.yaml
+++ b/.azure/azure-ci-build-pipeline.yaml
@@ -44,7 +44,7 @@ stages:
         pool:
           vmImage: ${{ item.img }}
         
-        condition: or( eq(variables['BuildReason'], 'PullRequest'), item.every_time )
+        condition: or( eq(variables['BuildReason'], 'PullRequest'), ${{ item.every_time }} )
 
         steps:
           - task: UsePythonVersion@0

From a07ce00852f2803bfcd768755c1bc613eb58e95e Mon Sep 17 00:00:00 2001
From: Marek Wolan <marek.wolan@methods.co.uk>
Date: Tue, 11 Jul 2023 09:56:52 +0100
Subject: [PATCH 22/32] Added glossary

---
 docs/index.rst           |  1 +
 docs/source/glossary.rst | 76 ++++++++++++++++++++++++++++++++++++++++
 2 files changed, 77 insertions(+)
 create mode 100644 docs/source/glossary.rst

diff --git a/docs/index.rst b/docs/index.rst
index 17dae2c9..4be73154 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -38,6 +38,7 @@ The best place to start is :ref:`about`
    PrimAITE API <source/_autosummary/primaite>
    PrimAITE Tests <source/_autosummary/tests>
    source/dependencies
+   source/glossary
 
 .. toctree::
    :caption: Project Links:
diff --git a/docs/source/glossary.rst b/docs/source/glossary.rst
new file mode 100644
index 00000000..6ebf99f9
--- /dev/null
+++ b/docs/source/glossary.rst
@@ -0,0 +1,76 @@
+Glossary
+=============
+
+.. glossary::
+
+    Network
+        The network in primaite is a logical representation of a computer network containing :term:`Node<Nodes>` and :term:`Link<Links>`.
+
+    Node
+        A Node represents a network endpoint. For example a computer, server, switch, or an actuator.
+
+    Link
+        A Link represents the connection between two Nodes. For example, a physical wire between a computer and a switch or a wireless connection.
+
+    Agent
+        An agent is a representation of a user of the network. Typically this would be a user that is using one of the computer nodes, though it could be an autonomous agent.
+
+    Red Agent
+        An agent that is aiming to attack the network in some way, for example by executing a Denial-Of-Service attack or stealing data.
+
+    Blue Agent
+        A defensive agent that protects the network from Red Agent attacks to minimise disruption to green agents and protect data.
+
+    Green agent
+        Simulates typical benign activity on the network, such as real users using computers and servers.
+
+    Information Exchange Request (IER)
+        ...
+
+    Pattern-of-Life (PoL)
+        ...
+
+    Protocol
+        ...
+
+    Service
+        ...
+
+    Gym
+        ...
+
+    Reward
+        ...
+
+    Access Control List
+        ...
+
+    Observation
+        ...
+
+    Action
+        ...
+
+    StableBaselines3
+        ...
+
+    Ray RLLib
+        ...
+
+    Episode
+        ...
+
+    Step
+        ...
+
+    Reference environment
+        ...
+
+    Transaction
+        ...
+
+    Laydown
+        ...
+
+    User data directory
+        PrimAITE supports upgrading software version while retaining user data. The user data directory is where configs, notebooks, and results are stored, this location is `~/primaite` on linux/darwin and `C:\Users\<username>\primaite` on Windows.

From 6c7ec6216680f7043e30939da7a0b7ecc2312edc Mon Sep 17 00:00:00 2001
From: Marek Wolan <marek.wolan@methods.co.uk>
Date: Tue, 11 Jul 2023 09:57:27 +0100
Subject: [PATCH 23/32] Fixed formatting with pre-commit

---
 .azure/azure-ci-build-pipeline.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.azure/azure-ci-build-pipeline.yaml b/.azure/azure-ci-build-pipeline.yaml
index e3f11b7f..4001b1c5 100644
--- a/.azure/azure-ci-build-pipeline.yaml
+++ b/.azure/azure-ci-build-pipeline.yaml
@@ -43,7 +43,7 @@ stages:
       - job: ${{ item.job_name }}
         pool:
           vmImage: ${{ item.img }}
-        
+
         condition: or( eq(variables['BuildReason'], 'PullRequest'), ${{ item.every_time }} )
 
         steps:

From 1633900ce76cbf9164d1c0a44c78852f49a0c36d Mon Sep 17 00:00:00 2001
From: Marek Wolan <marek.wolan@methods.co.uk>
Date: Tue, 11 Jul 2023 09:01:43 +0000
Subject: [PATCH 24/32] Fix typo in `Build.Reason`

---
 .azure/azure-ci-build-pipeline.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.azure/azure-ci-build-pipeline.yaml b/.azure/azure-ci-build-pipeline.yaml
index 4001b1c5..066c66b2 100644
--- a/.azure/azure-ci-build-pipeline.yaml
+++ b/.azure/azure-ci-build-pipeline.yaml
@@ -44,7 +44,7 @@ stages:
         pool:
           vmImage: ${{ item.img }}
 
-        condition: or( eq(variables['BuildReason'], 'PullRequest'), ${{ item.every_time }} )
+        condition: or( eq(variables['Build.Reason'], 'PullRequest'), ${{ item.every_time }} )
 
         steps:
           - task: UsePythonVersion@0

From dc2686321688c11926a5db773004bbc9a100685b Mon Sep 17 00:00:00 2001
From: Marek Wolan <marek.wolan@methods.co.uk>
Date: Tue, 11 Jul 2023 11:13:28 +0100
Subject: [PATCH 25/32] Completed glossary

---
 docs/source/glossary.rst | 44 ++++++++++++++++++++--------------------
 1 file changed, 22 insertions(+), 22 deletions(-)

diff --git a/docs/source/glossary.rst b/docs/source/glossary.rst
index 6ebf99f9..796b6aa1 100644
--- a/docs/source/glossary.rst
+++ b/docs/source/glossary.rst
@@ -4,7 +4,7 @@ Glossary
 .. glossary::
 
     Network
-        The network in primaite is a logical representation of a computer network containing :term:`Node<Nodes>` and :term:`Link<Links>`.
+        The network in primaite is a logical representation of a computer network containing :term:`Nodes<Node>` and :term:`Links<Link>`.
 
     Node
         A Node represents a network endpoint. For example a computer, server, switch, or an actuator.
@@ -24,53 +24,53 @@ Glossary
     Green agent
         Simulates typical benign activity on the network, such as real users using computers and servers.
 
-    Information Exchange Request (IER)
-        ...
+    Information Exchange Requirement (IER)
+        Simulates network traffic by sending data from one network node to another via links for a specified amount of time. IERs can be part of green agent behaviour or red agent behaviour. PrimAITE can be configured to apply a penalty for green agents' IERs being blocked and a reward for red agents' IERs being blocked.
 
     Pattern-of-Life (PoL)
-        ...
+        PoLs allow agents to change the current hardware, OS, file system, or service statuses of nodes during the course of an episode. For example, a green agent may restart a server node to represent scheduled maintainance. A red agent's Pattern-of-Life can be used to attack nodes by changing their states to CORRUPTED or COMPROMISED.
 
     Protocol
-        ...
+        Protocols are used by links to separate different types of network traffic. Common examples would be HTTP, TCP, and UDP.
 
     Service
-        ...
+        A service represents a piece of software that is installed on a node, such as a web server or a database.
 
     Gym
-        ...
+        PrimAITE uses the Gym reinforcement learning framework API to create a training environment and interface with RL agents. Gym defines a common way of creating observations, actions, and rewards.
 
     Reward
-        ...
+        The reward is a single number used by the blue agent to understand whether it's performing well or poorly. RL agents change their behaviour in an attempt to increase the expected reward each episode. The reward is generated based on the current state of the environment and is impacted positively by things like green IERS running successfully and negatively by things like nodes being compromised.
 
     Access Control List
-        ...
+        PrimAITE blocks or allows certain traffic on the network by simulating firewall rules, which are defined in the Access Control List.
 
     Observation
-        ...
+        An observation is a representation of the current state of the environment that is given to the learning agent so it can decide on which action to perform. If the environment is 'fully observable', the observation contains information about every possible aspect of the environment. More commonly, the environment is 'partially observable' which means the learning agent has to make decisions without knowing every detail of the current environment state.
 
     Action
-        ...
-
-    StableBaselines3
-        ...
-
-    Ray RLLib
-        ...
+        The learning agent decides on an action to take on every step in the simulation. The action has the chance to positively or negatively impact the environment state. Over time, the agent aims to learn which actions to take when to maximise the expected reward.
 
     Episode
-        ...
+        When an episode starts, the network simulation is reset to an initial state. The agents take actions on each step of the episode until it reaches a terminal state, which usually happens after a predetermined number of steps. After the terminal state is reached, a new episode starts and the RL agent has another opportunity to protect the network.
+
+    Training
+        During training, an RL agent is placed in the simulated network and it learns which actions to take in which scenarios to obtain maximum reward.
+
+    Evaluation
+        During evaluation, an RL agent acts on the simulated network but it is not allowed to update it's behaviour. Evaluation is used to assess how successful agents are at defending the network.
 
     Step
-        ...
+        The agents can only act in the environment at discrete intervals. The time step is the basic unit of time in the simulation. At each step, the RL agent has an opportunity to observe the state of the environment and decide an action. Steps are also used for updating states for time-dependent activities such as rebooting a node.
 
     Reference environment
-        ...
+        While the network simulation is unfolding, a parallel simulation takes place which is identical to the main one except that blue and red agent actions are not applied. This reference environment essentially shows what would be happening to the network if there had been no cyberattack or defense. The reference environment is used to calculate rewards.
 
     Transaction
-        ...
+        PrimAITE records the decisions of the learning agent by saving its observation, action, and reward at every time step. During each session, this data is saved to disk to allow for full inspection.
 
     Laydown
-        ...
+        The laydown is a file which defines the training scenario. It contains the network topology, firewall rules, services, protocols, and details about green and red agent behaviours.
 
     User data directory
         PrimAITE supports upgrading software version while retaining user data. The user data directory is where configs, notebooks, and results are stored, this location is `~/primaite` on linux/darwin and `C:\Users\<username>\primaite` on Windows.

From 9e936513d57fa3ad8c4987a6cf29b7aa8a5555d8 Mon Sep 17 00:00:00 2001
From: Marek Wolan <marek.wolan@methods.co.uk>
Date: Tue, 11 Jul 2023 11:31:29 +0100
Subject: [PATCH 26/32] Improved order of glossary terms

---
 docs/source/glossary.rst | 37 +++++++++++++++++++------------------
 1 file changed, 19 insertions(+), 18 deletions(-)

diff --git a/docs/source/glossary.rst b/docs/source/glossary.rst
index 796b6aa1..34e3c8a3 100644
--- a/docs/source/glossary.rst
+++ b/docs/source/glossary.rst
@@ -2,6 +2,7 @@ Glossary
 =============
 
 .. glossary::
+    :sorted:
 
     Network
         The network in primaite is a logical representation of a computer network containing :term:`Nodes<Node>` and :term:`Links<Link>`.
@@ -12,48 +13,42 @@ Glossary
     Link
         A Link represents the connection between two Nodes. For example, a physical wire between a computer and a switch or a wireless connection.
 
+    Protocol
+        Protocols are used by links to separate different types of network traffic. Common examples would be HTTP, TCP, and UDP.
+
+    Service
+        A service represents a piece of software that is installed on a node, such as a web server or a database.
+
+    Access Control List
+        PrimAITE blocks or allows certain traffic on the network by simulating firewall rules, which are defined in the Access Control List.
+
     Agent
         An agent is a representation of a user of the network. Typically this would be a user that is using one of the computer nodes, though it could be an autonomous agent.
 
+    Green agent
+        Simulates typical benign activity on the network, such as real users using computers and servers.
+
     Red Agent
         An agent that is aiming to attack the network in some way, for example by executing a Denial-Of-Service attack or stealing data.
 
     Blue Agent
         A defensive agent that protects the network from Red Agent attacks to minimise disruption to green agents and protect data.
 
-    Green agent
-        Simulates typical benign activity on the network, such as real users using computers and servers.
-
     Information Exchange Requirement (IER)
         Simulates network traffic by sending data from one network node to another via links for a specified amount of time. IERs can be part of green agent behaviour or red agent behaviour. PrimAITE can be configured to apply a penalty for green agents' IERs being blocked and a reward for red agents' IERs being blocked.
 
     Pattern-of-Life (PoL)
         PoLs allow agents to change the current hardware, OS, file system, or service statuses of nodes during the course of an episode. For example, a green agent may restart a server node to represent scheduled maintainance. A red agent's Pattern-of-Life can be used to attack nodes by changing their states to CORRUPTED or COMPROMISED.
 
-    Protocol
-        Protocols are used by links to separate different types of network traffic. Common examples would be HTTP, TCP, and UDP.
-
-    Service
-        A service represents a piece of software that is installed on a node, such as a web server or a database.
-
-    Gym
-        PrimAITE uses the Gym reinforcement learning framework API to create a training environment and interface with RL agents. Gym defines a common way of creating observations, actions, and rewards.
-
     Reward
         The reward is a single number used by the blue agent to understand whether it's performing well or poorly. RL agents change their behaviour in an attempt to increase the expected reward each episode. The reward is generated based on the current state of the environment and is impacted positively by things like green IERS running successfully and negatively by things like nodes being compromised.
 
-    Access Control List
-        PrimAITE blocks or allows certain traffic on the network by simulating firewall rules, which are defined in the Access Control List.
-
     Observation
         An observation is a representation of the current state of the environment that is given to the learning agent so it can decide on which action to perform. If the environment is 'fully observable', the observation contains information about every possible aspect of the environment. More commonly, the environment is 'partially observable' which means the learning agent has to make decisions without knowing every detail of the current environment state.
 
     Action
         The learning agent decides on an action to take on every step in the simulation. The action has the chance to positively or negatively impact the environment state. Over time, the agent aims to learn which actions to take when to maximise the expected reward.
 
-    Episode
-        When an episode starts, the network simulation is reset to an initial state. The agents take actions on each step of the episode until it reaches a terminal state, which usually happens after a predetermined number of steps. After the terminal state is reached, a new episode starts and the RL agent has another opportunity to protect the network.
-
     Training
         During training, an RL agent is placed in the simulated network and it learns which actions to take in which scenarios to obtain maximum reward.
 
@@ -63,6 +58,9 @@ Glossary
     Step
         The agents can only act in the environment at discrete intervals. The time step is the basic unit of time in the simulation. At each step, the RL agent has an opportunity to observe the state of the environment and decide an action. Steps are also used for updating states for time-dependent activities such as rebooting a node.
 
+    Episode
+        When an episode starts, the network simulation is reset to an initial state. The agents take actions on each step of the episode until it reaches a terminal state, which usually happens after a predetermined number of steps. After the terminal state is reached, a new episode starts and the RL agent has another opportunity to protect the network.
+
     Reference environment
         While the network simulation is unfolding, a parallel simulation takes place which is identical to the main one except that blue and red agent actions are not applied. This reference environment essentially shows what would be happening to the network if there had been no cyberattack or defense. The reference environment is used to calculate rewards.
 
@@ -72,5 +70,8 @@ Glossary
     Laydown
         The laydown is a file which defines the training scenario. It contains the network topology, firewall rules, services, protocols, and details about green and red agent behaviours.
 
+    Gym
+        PrimAITE uses the Gym reinforcement learning framework API to create a training environment and interface with RL agents. Gym defines a common way of creating observations, actions, and rewards.
+
     User data directory
         PrimAITE supports upgrading software version while retaining user data. The user data directory is where configs, notebooks, and results are stored, this location is `~/primaite` on linux/darwin and `C:\Users\<username>\primaite` on Windows.

From 5f6bc32b98c5b15aeb1b868bc3531ffcfd5e66a9 Mon Sep 17 00:00:00 2001
From: Marek Wolan <marek.wolan@methods.co.uk>
Date: Tue, 11 Jul 2023 12:01:48 +0100
Subject: [PATCH 27/32] Added draft migration guide.

---
 docs/index.rst                      |  1 +
 docs/source/migration_1.2_-_2.0.rst | 43 +++++++++++++++++++++++++++++
 2 files changed, 44 insertions(+)
 create mode 100644 docs/source/migration_1.2_-_2.0.rst

diff --git a/docs/index.rst b/docs/index.rst
index 02baa695..fed65919 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -39,6 +39,7 @@ The best place to start is :ref:`about`
    PrimAITE Tests <source/_autosummary/tests>
    source/dependencies
    source/glossary
+   source/migration_1.2_-_2.0
 
 .. toctree::
    :caption: Project Links:
diff --git a/docs/source/migration_1.2_-_2.0.rst b/docs/source/migration_1.2_-_2.0.rst
new file mode 100644
index 00000000..99cb891b
--- /dev/null
+++ b/docs/source/migration_1.2_-_2.0.rst
@@ -0,0 +1,43 @@
+v1.2 to v2.0 Migration guide
+============================
+
+**1. Running a training session**
+
+    In version 1.2 of PrimAITE, the main entry point for training or evaluating agents was the ``src/primaite/main.py`` file. v2.0.0 introduced managed 'sessions' which are responsible for reading configuration files, performing training, and writing outputs.
+
+    ``main.py`` file still runs a training session but it now uses the new `PrimaiteSession`, and it now requires you to provide the path to your config files.
+
+    .. code-block:: bash
+
+        python src/primaite/main.py --tc path/to/training-config.yaml --ldc path/to/laydown-config.yaml
+
+    Alternatively, the session can be invoked via the commandline by running:
+
+    .. code-block:: bash
+
+        primaite session --tc path/to/training-config.yaml --ldc path/to/laydown-config.yaml
+
+**2. Location of configs**
+
+    In version 1.2, training configs and laydown configs were all stored in the project repository under ``src/primaite/config``. Version 2.0.0 introduced user data directories, and now when you install and setup PrimAITE, config files are stored in your user data location. On Linux/OSX, this is stored in ``~/primaite/config``. On Windows, this is stored in ``C:\Users\<your username>\primaite\configs``. Upon first setup, the configs folder is populated with some default yaml files. It is recommended that you store all your custom configuration files here.
+
+**3. Contents of configs**
+
+    Some things that were previously part of the laydown config are now part of the traning config.
+
+        * Actions
+
+    If you have custom configs which use these, you will need to adapt them by moving the configuration from the laydown config to the training config.
+
+    Also, there are new configurable items in the training config:
+
+        * Observations
+        * Agent framework
+        * Agent
+        * Deep learning framework
+        * random red agents
+        * seed
+        * deterministic
+        * hard coded agent view
+
+    Each of these items have default values which are designed so that PrimAITE has the same behaviour as it did in 1.2.0, so you do not have to specify them.

From 6a888d2efeeab68dca4f0b5d9ef3b7afc912a446 Mon Sep 17 00:00:00 2001
From: Marek Wolan <marek.wolan@methods.co.uk>
Date: Tue, 11 Jul 2023 12:10:20 +0100
Subject: [PATCH 28/32] Updated migration guide

---
 docs/source/migration_1.2_-_2.0.rst | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/docs/source/migration_1.2_-_2.0.rst b/docs/source/migration_1.2_-_2.0.rst
index 99cb891b..2adf9656 100644
--- a/docs/source/migration_1.2_-_2.0.rst
+++ b/docs/source/migration_1.2_-_2.0.rst
@@ -1,7 +1,15 @@
 v1.2 to v2.0 Migration guide
 ============================
 
-**1. Running a training session**
+**1. Installing PrimAITE**
+
+    Like before, you can install primaite from the repository by running ``pip install -e .``. But, there is now an additional setup step which does several things, like setting up user directories, copy default configs and notebooks, etc. Once you have installed PrimAITE to your virtual environment, run this command to finalise setup.
+
+    .. code-block:: bash
+
+        primaite setup
+
+**2. Running a training session**
 
     In version 1.2 of PrimAITE, the main entry point for training or evaluating agents was the ``src/primaite/main.py`` file. v2.0.0 introduced managed 'sessions' which are responsible for reading configuration files, performing training, and writing outputs.
 
@@ -17,11 +25,11 @@ v1.2 to v2.0 Migration guide
 
         primaite session --tc path/to/training-config.yaml --ldc path/to/laydown-config.yaml
 
-**2. Location of configs**
+**3. Location of configs**
 
     In version 1.2, training configs and laydown configs were all stored in the project repository under ``src/primaite/config``. Version 2.0.0 introduced user data directories, and now when you install and setup PrimAITE, config files are stored in your user data location. On Linux/OSX, this is stored in ``~/primaite/config``. On Windows, this is stored in ``C:\Users\<your username>\primaite\configs``. Upon first setup, the configs folder is populated with some default yaml files. It is recommended that you store all your custom configuration files here.
 
-**3. Contents of configs**
+**4. Contents of configs**
 
     Some things that were previously part of the laydown config are now part of the traning config.
 

From f3750032bea2a5c600559fa4910e3031c41fdfdd Mon Sep 17 00:00:00 2001
From: SunilSamra <sunil-samra@nsc.co.uk>
Date: Tue, 11 Jul 2023 12:37:14 +0100
Subject: [PATCH 29/32] 1566 - applied pre-commit

---
 tests/test_train_eval_episode_steps.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_train_eval_episode_steps.py b/tests/test_train_eval_episode_steps.py
index daa93055..b839e630 100644
--- a/tests/test_train_eval_episode_steps.py
+++ b/tests/test_train_eval_episode_steps.py
@@ -22,8 +22,8 @@ def test_eval_steps_differ_from_training(temp_primaite_session):
         num_eval_episodes = 1
     """
     expected_learning_metadata = {"total_episodes": 3, "total_time_steps": 75}
-
     expected_evaluation_metadata = {"total_episodes": 1, "total_time_steps": 17}
+
     with temp_primaite_session as session:
         # Run learning and check episode and step counts
         session.learn()

From 585d35338f96c7354af96f140674cfc6d5faf9ac Mon Sep 17 00:00:00 2001
From: SunilSamra <sunil-samra@nsc.co.uk>
Date: Tue, 11 Jul 2023 12:40:25 +0100
Subject: [PATCH 30/32] 1566 - updated docs for new items in training_config

---
 docs/source/config.rst | 19 +++++++++++++++----
 1 file changed, 15 insertions(+), 4 deletions(-)

diff --git a/docs/source/config.rst b/docs/source/config.rst
index a28f0ec1..af590a24 100644
--- a/docs/source/config.rst
+++ b/docs/source/config.rst
@@ -83,13 +83,24 @@ The environment config file consists of the following attributes:
 
     The other configurable item is ``flatten`` which is false by default. When set to true, the observation space is flattened (turned into a 1-D vector). You should use this if your RL agent does not natively support observation space types like ``gym.Spaces.Tuple``.
 
-* **num_episodes** [int]
+* **num_train_episodes** [int]
 
-    This defines the number of episodes that the agent will train or be evaluated over.
+    This defines the number of episodes that the agent will train for.
 
-* **num_steps** [int]
 
-    Determines the number of steps to run in each episode of the session
+* **num_train_steps** [int]
+
+    Determines the number of steps to run in each episode of the training session.
+
+
+* **num_eval_episodes** [int]
+
+    This defines the number of episodes that the agent will be evaluated over.
+
+
+* **num_eval_steps** [int]
+
+    Determines the number of steps to run in each episode of the evaluation session.
 
 
 * **time_delay** [int]

From c7547f715edb1f8e14e142330b5a368808fa716a Mon Sep 17 00:00:00 2001
From: Marek Wolan <marek.wolan@methods.co.uk>
Date: Wed, 12 Jul 2023 09:16:40 +0100
Subject: [PATCH 31/32] Add better hyperlinks

---
 docs/source/custom_agent.rst     | 2 +-
 docs/source/glossary.rst         | 2 +-
 docs/source/primaite_session.rst | 2 ++
 3 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/docs/source/custom_agent.rst b/docs/source/custom_agent.rst
index 45d1c5a4..b4552d64 100644
--- a/docs/source/custom_agent.rst
+++ b/docs/source/custom_agent.rst
@@ -135,4 +135,4 @@ Finally, specify your agent in your training config.
     random_red_agent: False
     # ...
 
-Now you can `Run a PrimAITE Session<run a primaite session>` with your custom agent by passing in the custom ``config_main``.
+Now you can :ref:`run a primaite session<run a primaite session>` with your custom agent by passing in the custom ``config_main``.
diff --git a/docs/source/glossary.rst b/docs/source/glossary.rst
index 34e3c8a3..58b4cd5e 100644
--- a/docs/source/glossary.rst
+++ b/docs/source/glossary.rst
@@ -41,7 +41,7 @@ Glossary
         PoLs allow agents to change the current hardware, OS, file system, or service statuses of nodes during the course of an episode. For example, a green agent may restart a server node to represent scheduled maintainance. A red agent's Pattern-of-Life can be used to attack nodes by changing their states to CORRUPTED or COMPROMISED.
 
     Reward
-        The reward is a single number used by the blue agent to understand whether it's performing well or poorly. RL agents change their behaviour in an attempt to increase the expected reward each episode. The reward is generated based on the current state of the environment and is impacted positively by things like green IERS running successfully and negatively by things like nodes being compromised.
+        The reward is a single number used by the blue agent to understand whether it's performing well or poorly. RL agents change their behaviour in an attempt to increase the expected reward each episode. The reward is generated based on the current states of the environment / :term:`reference environment` and is impacted positively by things like green IERS running successfully and negatively by things like nodes being compromised.
 
     Observation
         An observation is a representation of the current state of the environment that is given to the learning agent so it can decide on which action to perform. If the environment is 'fully observable', the observation contains information about every possible aspect of the environment. More commonly, the environment is 'partially observable' which means the learning agent has to make decisions without knowing every detail of the current environment state.
diff --git a/docs/source/primaite_session.rst b/docs/source/primaite_session.rst
index 1b48494a..a393093c 100644
--- a/docs/source/primaite_session.rst
+++ b/docs/source/primaite_session.rst
@@ -1,3 +1,5 @@
+.. _run a primaite session:
+
 Run a PrimAITE Session
 ======================
 

From 96b48aad796b627034150f515a73bb27dd64d722 Mon Sep 17 00:00:00 2001
From: SunilSamra <sunil-samra@nsc.co.uk>
Date: Wed, 12 Jul 2023 09:52:54 +0100
Subject: [PATCH 32/32] 1566 - removed redundant config file

---
 tests/config/training_config_main.yaml | 153 -------------------------
 1 file changed, 153 deletions(-)
 delete mode 100644 tests/config/training_config_main.yaml

diff --git a/tests/config/training_config_main.yaml b/tests/config/training_config_main.yaml
deleted file mode 100644
index 3351d66b..00000000
--- a/tests/config/training_config_main.yaml
+++ /dev/null
@@ -1,153 +0,0 @@
-# Training Config File
-
-# Sets which agent algorithm framework will be used.
-# Options are:
-# "SB3" (Stable Baselines3)
-# "RLLIB" (Ray RLlib)
-# "CUSTOM" (Custom Agent)
-agent_framework: SB3
-
-# Sets which deep learning framework will be used (by RLlib ONLY).
-# Default is TF (Tensorflow).
-# Options are:
-# "TF" (Tensorflow)
-# TF2 (Tensorflow 2.X)
-# TORCH (PyTorch)
-deep_learning_framework: TF2
-
-# Sets which Agent class will be used.
-# Options are:
-# "A2C" (Advantage Actor Critic coupled with either SB3 or RLLIB agent_framework)
-# "PPO" (Proximal Policy Optimization coupled with either SB3 or RLLIB agent_framework)
-# "HARDCODED" (The HardCoded agents coupled with an ACL or NODE action_type)
-# "DO_NOTHING" (The DoNothing agents coupled with an ACL or NODE action_type)
-# "RANDOM" (primaite.agents.simple.RandomAgent)
-# "DUMMY" (primaite.agents.simple.DummyAgent)
-agent_identifier: PPO
-
-# Sets whether Red Agent POL and IER is randomised.
-# Options are:
-# True
-# False
-random_red_agent: False
-
-# Sets what view of the environment the deterministic hardcoded agent has. The default is BASIC.
-# Options are:
-# "BASIC" (The current observation space only)
-# "FULL" (Full environment view with actions taken and reward feedback)
-hard_coded_agent_view: FULL
-
-# Sets How the Action Space is defined:
-# "NODE"
-# "ACL"
-# "ANY" node and acl actions
-action_type: NODE
-# observation space
-observation_space:
-  # flatten: true
-  components:
-    - name: NODE_LINK_TABLE
-    # - name: NODE_STATUSES
-    # - name: LINK_TRAFFIC_LEVELS
-
-
-# Number of episodes for training to run per session
-num_train_episodes: 10
-
-# Number of time_steps for training per episode
-num_train_steps: 256
-
-# Number of episodes for evaluation to run per session
-num_eval_episodes: 10
-
-# Number of time_steps for evaluation per episode
-num_eval_steps: 256
-
-# Sets how often the agent will save a checkpoint (every n time episodes).
-# Set to 0 if no checkpoints are required. Default is 10
-checkpoint_every_n_episodes: 10
-
-# Time delay (milliseconds) between steps for CUSTOM agents.
-time_delay: 5
-
-# Type of session to be run. Options are:
-# "TRAIN" (Trains an agent)
-# "EVAL" (Evaluates an agent)
-# "TRAIN_EVAL" (Trains then evaluates an agent)
-session_type: TRAIN
-
-# Environment config values
-# The high value for the observation space
-observation_space_high_value: 1000000000
-
-# The Stable Baselines3 learn/eval output verbosity level:
-# Options are:
-# "NONE" (No Output)
-# "INFO" (Info Messages (such as devices and wrappers used))
-# "DEBUG" (All Messages)
-sb3_output_verbose_level: NONE
-
-# Reward values
-# Generic
-all_ok: 0
-# Node Hardware State
-off_should_be_on: -10
-off_should_be_resetting: -5
-on_should_be_off: -2
-on_should_be_resetting: -5
-resetting_should_be_on: -5
-resetting_should_be_off: -2
-resetting: -3
-# Node Software or Service State
-good_should_be_patching: 2
-good_should_be_compromised: 5
-good_should_be_overwhelmed: 5
-patching_should_be_good: -5
-patching_should_be_compromised: 2
-patching_should_be_overwhelmed: 2
-patching: -3
-compromised_should_be_good: -20
-compromised_should_be_patching: -20
-compromised_should_be_overwhelmed: -20
-compromised: -20
-overwhelmed_should_be_good: -20
-overwhelmed_should_be_patching: -20
-overwhelmed_should_be_compromised: -20
-overwhelmed: -20
-# Node File System State
-good_should_be_repairing: 2
-good_should_be_restoring: 2
-good_should_be_corrupt: 5
-good_should_be_destroyed: 10
-repairing_should_be_good: -5
-repairing_should_be_restoring: 2
-repairing_should_be_corrupt: 2
-repairing_should_be_destroyed: 0
-repairing: -3
-restoring_should_be_good: -10
-restoring_should_be_repairing: -2
-restoring_should_be_corrupt: 1
-restoring_should_be_destroyed: 2
-restoring: -6
-corrupt_should_be_good: -10
-corrupt_should_be_repairing: -10
-corrupt_should_be_restoring: -10
-corrupt_should_be_destroyed: 2
-corrupt: -10
-destroyed_should_be_good: -20
-destroyed_should_be_repairing: -20
-destroyed_should_be_restoring: -20
-destroyed_should_be_corrupt: -20
-destroyed: -20
-scanning: -2
-# IER status
-red_ier_running: -5
-green_ier_blocked: -10
-
-# Patching / Reset durations
-os_patching_duration: 5            # The time taken to patch the OS
-node_reset_duration: 5             # The time taken to reset a node (hardware)
-service_patching_duration: 5       # The time taken to patch a service
-file_system_repairing_limit: 5      # The time take to repair the file system
-file_system_restoring_limit: 5      # The time take to restore the file system
-file_system_scanning_limit: 5       # The time taken to scan the file system