From 4371ca13fc0b655aeb2d4d20b365ef400f5db68b Mon Sep 17 00:00:00 2001
From: SunilSamra <sunil-samra@nsc.co.uk>
Date: Thu, 6 Jul 2023 11:12:51 +0100
Subject: [PATCH 1/9] 1566 - added train_episodes, train_steps, eval_episodes
 and eval_steps to training_config_main.yaml

---
 .../training/training_config_main.yaml           | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/src/primaite/config/_package_data/training/training_config_main.yaml b/src/primaite/config/_package_data/training/training_config_main.yaml
index a638fe14..dc94e3bb 100644
--- a/src/primaite/config/_package_data/training/training_config_main.yaml
+++ b/src/primaite/config/_package_data/training/training_config_main.yaml
@@ -49,11 +49,19 @@ observation_space:
     - name: NODE_LINK_TABLE
     # - name: NODE_STATUSES
     # - name: LINK_TRAFFIC_LEVELS
-# Number of episodes to run per session
-num_episodes: 10
 
-# Number of time_steps per episode
-num_steps: 256
+
+# Number of episodes for training to run per session
+num_train_episodes: 10
+
+# Number of time_steps for training per episode
+num_train_steps: 256
+
+# Number of episodes for evaluation to run per session
+num_eval_episodes: 10
+
+# Number of time_steps for evaluation per episode
+num_eval_steps: 256
 
 # Sets how often the agent will save a checkpoint (every n time episodes).
 # Set to 0 if no checkpoints are required. Default is 10

From e03c29b921705e127b6cd1d844af9df4927950f7 Mon Sep 17 00:00:00 2001
From: SunilSamra <sunil-samra@nsc.co.uk>
Date: Fri, 7 Jul 2023 14:13:47 +0100
Subject: [PATCH 2/9] 1566 - added test file and edited configs to include
 types of num steps and modifed agents to use correct step and episode counts

---
 src/primaite/agents/agent.py                  |   4 +-
 src/primaite/agents/rllib.py                  |  10 +-
 src/primaite/agents/sb3.py                    |  11 +-
 src/primaite/config/training_config.py        |  30 +++-
 src/primaite/environment/primaite_env.py      |   8 +-
 .../new_training_config.yaml                  |  15 +-
 .../main_config_LINK_TRAFFIC_LEVELS.yaml      |  15 +-
 .../main_config_NODE_LINK_TABLE.yaml          |  15 +-
 .../obs_tests/main_config_NODE_STATUSES.yaml  |  15 +-
 .../obs_tests/main_config_without_obs.yaml    |  15 +-
 .../one_node_states_on_off_main_config.yaml   |  15 +-
 ..._space_fixed_blue_actions_main_config.yaml |  15 +-
 .../single_action_space_main_config.yaml      |  15 +-
 tests/config/test_random_red_main_config.yaml |  15 +-
 tests/config/train_episode_step.yaml          | 153 ++++++++++++++++++
 .../config/train_eval_check_episode_step.yaml | 153 ++++++++++++++++++
 tests/config/training_config_main.yaml        | 153 ++++++++++++++++++
 tests/conftest.py                             |   5 +-
 tests/test_single_action_space.py             |   4 +-
 tests/test_train_eval_episode_steps.py        |  46 ++++++
 20 files changed, 652 insertions(+), 60 deletions(-)
 create mode 100644 tests/config/train_episode_step.yaml
 create mode 100644 tests/config/train_eval_check_episode_step.yaml
 create mode 100644 tests/config/training_config_main.yaml
 create mode 100644 tests/test_train_eval_episode_steps.py

diff --git a/src/primaite/agents/agent.py b/src/primaite/agents/agent.py
index 685fe776..4eb398b4 100644
--- a/src/primaite/agents/agent.py
+++ b/src/primaite/agents/agent.py
@@ -348,8 +348,8 @@ class HardCodedAgentSessionABC(AgentSessionABC):
         self._env.set_as_eval()  # noqa
         self.is_eval = True
 
-        time_steps = self._training_config.num_steps
-        episodes = self._training_config.num_episodes
+        time_steps = self._training_config.num_eval_steps
+        episodes = self._training_config.num_eval_episodes
 
         obs = self._env.reset()
         for episode in range(episodes):
diff --git a/src/primaite/agents/rllib.py b/src/primaite/agents/rllib.py
index d851ba9c..443598e7 100644
--- a/src/primaite/agents/rllib.py
+++ b/src/primaite/agents/rllib.py
@@ -107,13 +107,13 @@ class RLlibAgent(AgentSessionABC):
             ),
         )
 
-        self._agent_config.training(train_batch_size=self._training_config.num_steps)
+        self._agent_config.training(train_batch_size=self._training_config.num_train_steps)
         self._agent_config.framework(framework="tf")
 
         self._agent_config.rollouts(
             num_rollout_workers=1,
             num_envs_per_worker=1,
-            horizon=self._training_config.num_steps,
+            horizon=self._training_config.num_train_steps,
         )
         self._agent: Algorithm = self._agent_config.build(logger_creator=_custom_log_creator(self.learning_path))
 
@@ -121,7 +121,7 @@ class RLlibAgent(AgentSessionABC):
         checkpoint_n = self._training_config.checkpoint_every_n_episodes
         episode_count = self._current_result["episodes_total"]
         if checkpoint_n > 0 and episode_count > 0:
-            if (episode_count % checkpoint_n == 0) or (episode_count == self._training_config.num_episodes):
+            if (episode_count % checkpoint_n == 0) or (episode_count == self._training_config.num_train_episodes):
                 self._agent.save(str(self.checkpoints_path))
 
     def learn(
@@ -133,8 +133,8 @@ class RLlibAgent(AgentSessionABC):
 
         :param kwargs: Any agent-specific key-word args to be passed.
         """
-        time_steps = self._training_config.num_steps
-        episodes = self._training_config.num_episodes
+        time_steps = self._training_config.num_train_steps
+        episodes = self._training_config.num_train_episodes
 
         _LOGGER.info(f"Beginning learning for {episodes} episodes @" f" {time_steps} time steps...")
         for i in range(episodes):
diff --git a/src/primaite/agents/sb3.py b/src/primaite/agents/sb3.py
index f5ac44cb..17827ff4 100644
--- a/src/primaite/agents/sb3.py
+++ b/src/primaite/agents/sb3.py
@@ -53,11 +53,12 @@ class SB3Agent(AgentSessionABC):
             session_path=self.session_path,
             timestamp_str=self.timestamp_str,
         )
+
         self._agent = self._agent_class(
             PPOMlp,
             self._env,
             verbose=self.sb3_output_verbose_level,
-            n_steps=self._training_config.num_steps,
+            n_steps=self._training_config.num_eval_steps,
             tensorboard_log=str(self._tensorboard_log_path),
         )
 
@@ -82,8 +83,8 @@ class SB3Agent(AgentSessionABC):
 
         :param kwargs: Any agent-specific key-word args to be passed.
         """
-        time_steps = self._training_config.num_steps
-        episodes = self._training_config.num_episodes
+        time_steps = self._training_config.num_train_steps
+        episodes = self._training_config.num_train_episodes
         self.is_eval = False
         _LOGGER.info(f"Beginning learning for {episodes} episodes @" f" {time_steps} time steps...")
         for i in range(episodes):
@@ -104,8 +105,8 @@ class SB3Agent(AgentSessionABC):
         :param deterministic: Whether the evaluation is deterministic.
         :param kwargs: Any agent-specific key-word args to be passed.
         """
-        time_steps = self._training_config.num_steps
-        episodes = self._training_config.num_episodes
+        time_steps = self._training_config.num_eval_steps
+        episodes = self._training_config.num_eval_episodes
         self._env.set_as_eval()
         self.is_eval = True
         if deterministic:
diff --git a/src/primaite/config/training_config.py b/src/primaite/config/training_config.py
index bd73f65b..018fd982 100644
--- a/src/primaite/config/training_config.py
+++ b/src/primaite/config/training_config.py
@@ -60,11 +60,17 @@ class TrainingConfig:
     action_type: ActionType = ActionType.ANY
     "The ActionType to use"
 
-    num_episodes: int = 10
-    "The number of episodes to train over"
+    num_train_episodes: int = 10
+    "The number of episodes to train over during an training session"
 
-    num_steps: int = 256
-    "The number of steps in an episode"
+    num_train_steps: int = 256
+    "The number of steps in an episode during an training session"
+
+    num_eval_episodes: int = 10
+    "The number of episodes to train over during an evaluation session"
+
+    num_eval_steps: int = 256
+    "The number of steps in an episode during an evaluation session"
 
     checkpoint_every_n_episodes: int = 5
     "The agent will save a checkpoint every n episodes"
@@ -230,8 +236,17 @@ class TrainingConfig:
             tc += f"{self.hard_coded_agent_view}, "
         tc += f"{self.action_type}, "
         tc += f"observation_space={self.observation_space}, "
-        tc += f"{self.num_episodes} episodes @ "
-        tc += f"{self.num_steps} steps"
+        if self.session_type.name == "TRAIN":
+            tc += f"{self.num_train_episodes} episodes @ "
+            tc += f"{self.num_train_steps} steps"
+        elif self.session_type.name == "EVAL":
+            tc += f"{self.num_eval_episodes} episodes @ "
+            tc += f"{self.num_eval_steps} steps"
+        else:
+            tc += f"Training: {self.num_eval_episodes} episodes @ "
+            tc += f"{self.num_eval_steps} steps"
+            tc += f"Evaluation: {self.num_eval_episodes} episodes @ "
+            tc += f"{self.num_eval_steps} steps"
         return tc
 
 
@@ -320,7 +335,8 @@ def _get_new_key_from_legacy(legacy_key: str) -> str:
     """
     key_mapping = {
         "agentIdentifier": None,
-        "numEpisodes": "num_episodes",
+        "numEpisodes": "num_train_episodes",
+        "numSteps": "num_train_steps",
         "timeDelay": "time_delay",
         "configFilename": None,
         "sessionType": "session_type",
diff --git a/src/primaite/environment/primaite_env.py b/src/primaite/environment/primaite_env.py
index 03c23f93..c7e67e34 100644
--- a/src/primaite/environment/primaite_env.py
+++ b/src/primaite/environment/primaite_env.py
@@ -85,7 +85,12 @@ class Primaite(Env):
         _LOGGER.info(f"Using: {str(self.training_config)}")
 
         # Number of steps in an episode
-        self.episode_steps = self.training_config.num_steps
+        if self.training_config.session_type == SessionType.TRAIN:
+            self.episode_steps = self.training_config.num_train_steps
+        elif self.training_config.session_type == SessionType.EVAL:
+            self.episode_steps = self.training_config.num_eval_steps
+        else:
+            self.episode_steps = self.training_config.num_train_steps
 
         super(Primaite, self).__init__()
 
@@ -254,6 +259,7 @@ class Primaite(Env):
         self.episode_count = 0
         self.step_count = 0
         self.total_step_count = 0
+        self.episode_steps = self.training_config.num_eval_steps
 
     def reset(self):
         """
diff --git a/tests/config/legacy_conversion/new_training_config.yaml b/tests/config/legacy_conversion/new_training_config.yaml
index 49e6a00b..5ca80742 100644
--- a/tests/config/legacy_conversion/new_training_config.yaml
+++ b/tests/config/legacy_conversion/new_training_config.yaml
@@ -20,10 +20,17 @@ agent_identifier: PPO
 # "ACL"
 # "ANY" node and acl actions
 action_type: ANY
-# Number of episodes to run per session
-num_episodes: 10
-# Number of time_steps per episode
-num_steps: 256
+# Number of episodes for training to run per session
+num_train_episodes: 10
+
+# Number of time_steps for training per episode
+num_train_steps: 256
+
+# Number of episodes for evaluation to run per session
+num_eval_episodes: 10
+
+# Number of time_steps for evaluation per episode
+num_eval_steps: 256
 # Time delay between steps (for generic agents)
 time_delay: 10
 # Type of session to be run (TRAINING or EVALUATION)
diff --git a/tests/config/obs_tests/main_config_LINK_TRAFFIC_LEVELS.yaml b/tests/config/obs_tests/main_config_LINK_TRAFFIC_LEVELS.yaml
index d26d7955..bbdce9c1 100644
--- a/tests/config/obs_tests/main_config_LINK_TRAFFIC_LEVELS.yaml
+++ b/tests/config/obs_tests/main_config_LINK_TRAFFIC_LEVELS.yaml
@@ -22,10 +22,17 @@ agent_identifier: A2C
 # "ACL"
 # "ANY" node and acl actions
 action_type: ANY
-# Number of episodes to run per session
-num_episodes: 1
-# Number of time_steps per episode
-num_steps: 5
+# Number of episodes for training to run per session
+num_train_episodes: 10
+
+# Number of time_steps for training per episode
+num_train_steps: 256
+
+# Number of episodes for evaluation to run per session
+num_eval_episodes: 10
+
+# Number of time_steps for evaluation per episode
+num_eval_steps: 256
 
 
 observation_space:
diff --git a/tests/config/obs_tests/main_config_NODE_LINK_TABLE.yaml b/tests/config/obs_tests/main_config_NODE_LINK_TABLE.yaml
index aae740b6..41b3e588 100644
--- a/tests/config/obs_tests/main_config_NODE_LINK_TABLE.yaml
+++ b/tests/config/obs_tests/main_config_NODE_LINK_TABLE.yaml
@@ -22,10 +22,17 @@ agent_identifier: RANDOM
 # "ACL"
 # "ANY" node and acl actions
 action_type: ANY
-# Number of episodes to run per session
-num_episodes: 1
-# Number of time_steps per episode
-num_steps: 5
+# Number of episodes for training to run per session
+num_train_episodes: 10
+
+# Number of time_steps for training per episode
+num_train_steps: 256
+
+# Number of episodes for evaluation to run per session
+num_eval_episodes: 10
+
+# Number of time_steps for evaluation per episode
+num_eval_steps: 256
 
 observation_space:
   components:
diff --git a/tests/config/obs_tests/main_config_NODE_STATUSES.yaml b/tests/config/obs_tests/main_config_NODE_STATUSES.yaml
index 4066eace..34758199 100644
--- a/tests/config/obs_tests/main_config_NODE_STATUSES.yaml
+++ b/tests/config/obs_tests/main_config_NODE_STATUSES.yaml
@@ -22,10 +22,17 @@ agent_identifier: RANDOM
 # "ACL"
 # "ANY" node and acl actions
 action_type: ANY
-# Number of episodes to run per session
-num_episodes: 1
-# Number of time_steps per episode
-num_steps: 5
+# Number of episodes for training to run per session
+num_train_episodes: 10
+
+# Number of time_steps for training per episode
+num_train_steps: 256
+
+# Number of episodes for evaluation to run per session
+num_eval_episodes: 10
+
+# Number of time_steps for evaluation per episode
+num_eval_steps: 256
 
 observation_space:
   components:
diff --git a/tests/config/obs_tests/main_config_without_obs.yaml b/tests/config/obs_tests/main_config_without_obs.yaml
index 08452dda..352e765c 100644
--- a/tests/config/obs_tests/main_config_without_obs.yaml
+++ b/tests/config/obs_tests/main_config_without_obs.yaml
@@ -22,10 +22,17 @@ agent_identifier: RANDOM
 # "ACL"
 # "ANY" node and acl actions
 action_type: ANY
-# Number of episodes to run per session
-num_episodes: 1
-# Number of time_steps per episode
-num_steps: 5
+# Number of episodes for training to run per session
+num_train_episodes: 10
+
+# Number of time_steps for training per episode
+num_train_steps: 256
+
+# Number of episodes for evaluation to run per session
+num_eval_episodes: 10
+
+# Number of time_steps for evaluation per episode
+num_eval_steps: 256
 # Time delay between steps (for generic agents)
 time_delay: 1
 # Type of session to be run (TRAINING or EVALUATION)
diff --git a/tests/config/one_node_states_on_off_main_config.yaml b/tests/config/one_node_states_on_off_main_config.yaml
index 7f1ced01..63fdd1a5 100644
--- a/tests/config/one_node_states_on_off_main_config.yaml
+++ b/tests/config/one_node_states_on_off_main_config.yaml
@@ -22,10 +22,17 @@ agent_identifier: DUMMY
 # "ACL"
 # "ANY" node and acl actions
 action_type: NODE
-# Number of episodes to run per session
-num_episodes: 1
-# Number of time_steps per episode
-num_steps: 15
+# Number of episodes for training to run per session
+num_train_episodes: 10
+
+# Number of time_steps for training per episode
+num_train_steps: 256
+
+# Number of episodes for evaluation to run per session
+num_eval_episodes: 10
+
+# Number of time_steps for evaluation per episode
+num_eval_steps: 256
 # Time delay between steps (for generic agents)
 time_delay: 1
 
diff --git a/tests/config/single_action_space_fixed_blue_actions_main_config.yaml b/tests/config/single_action_space_fixed_blue_actions_main_config.yaml
index 97d0ddaf..859b2ab3 100644
--- a/tests/config/single_action_space_fixed_blue_actions_main_config.yaml
+++ b/tests/config/single_action_space_fixed_blue_actions_main_config.yaml
@@ -22,10 +22,17 @@ agent_identifier: RANDOM
 # "ACL"
 # "ANY" node and acl actions
 action_type: ANY
-# Number of episodes to run per session
-num_episodes: 1
-# Number of time_steps per episode
-num_steps: 15
+# Number of episodes for training to run per session
+num_train_episodes: 10
+
+# Number of time_steps for training per episode
+num_train_steps: 256
+
+# Number of episodes for evaluation to run per session
+num_eval_episodes: 10
+
+# Number of time_steps for evaluation per episode
+num_eval_steps: 256
 # Time delay between steps (for generic agents)
 time_delay: 1
 # Type of session to be run (TRAINING or EVALUATION)
diff --git a/tests/config/single_action_space_main_config.yaml b/tests/config/single_action_space_main_config.yaml
index 067b9a6d..c875757f 100644
--- a/tests/config/single_action_space_main_config.yaml
+++ b/tests/config/single_action_space_main_config.yaml
@@ -22,10 +22,17 @@ agent_identifier: RANDOM
 # "ACL"
 # "ANY" node and acl actions
 action_type: ANY
-# Number of episodes to run per session
-num_episodes: 1
-# Number of time_steps per episode
-num_steps: 5
+# Number of episodes for training to run per session
+num_train_episodes: 10
+
+# Number of time_steps for training per episode
+num_train_steps: 256
+
+# Number of episodes for evaluation to run per session
+num_eval_episodes: 10
+
+# Number of time_steps for evaluation per episode
+num_eval_steps: 256
 # Time delay between steps (for generic agents)
 time_delay: 1
 # Type of session to be run (TRAINING or EVALUATION)
diff --git a/tests/config/test_random_red_main_config.yaml b/tests/config/test_random_red_main_config.yaml
index 800fe808..e0fc40ee 100644
--- a/tests/config/test_random_red_main_config.yaml
+++ b/tests/config/test_random_red_main_config.yaml
@@ -28,10 +28,17 @@ random_red_agent: True
 # "ACL"
 # "ANY" node and acl actions
 action_type: NODE
-# Number of episodes to run per session
-num_episodes: 2
-# Number of time_steps per episode
-num_steps: 15
+# Number of episodes for training to run per session
+num_train_episodes: 10
+
+# Number of time_steps for training per episode
+num_train_steps: 256
+
+# Number of episodes for evaluation to run per session
+num_eval_episodes: 10
+
+# Number of time_steps for evaluation per episode
+num_eval_steps: 256
 # Time delay between steps (for generic agents)
 time_delay: 1
 
diff --git a/tests/config/train_episode_step.yaml b/tests/config/train_episode_step.yaml
new file mode 100644
index 00000000..550b95fd
--- /dev/null
+++ b/tests/config/train_episode_step.yaml
@@ -0,0 +1,153 @@
+# Training Config File
+
+# Sets which agent algorithm framework will be used.
+# Options are:
+# "SB3" (Stable Baselines3)
+# "RLLIB" (Ray RLlib)
+# "CUSTOM" (Custom Agent)
+agent_framework: SB3
+
+# Sets which deep learning framework will be used (by RLlib ONLY).
+# Default is TF (Tensorflow).
+# Options are:
+# "TF" (Tensorflow)
+# TF2 (Tensorflow 2.X)
+# TORCH (PyTorch)
+deep_learning_framework: TF2
+
+# Sets which Agent class will be used.
+# Options are:
+# "A2C" (Advantage Actor Critic coupled with either SB3 or RLLIB agent_framework)
+# "PPO" (Proximal Policy Optimization coupled with either SB3 or RLLIB agent_framework)
+# "HARDCODED" (The HardCoded agents coupled with an ACL or NODE action_type)
+# "DO_NOTHING" (The DoNothing agents coupled with an ACL or NODE action_type)
+# "RANDOM" (primaite.agents.simple.RandomAgent)
+# "DUMMY" (primaite.agents.simple.DummyAgent)
+agent_identifier: PPO
+
+# Sets whether Red Agent POL and IER is randomised.
+# Options are:
+# True
+# False
+random_red_agent: False
+
+# Sets what view of the environment the deterministic hardcoded agent has. The default is BASIC.
+# Options are:
+# "BASIC" (The current observation space only)
+# "FULL" (Full environment view with actions taken and reward feedback)
+hard_coded_agent_view: FULL
+
+# Sets How the Action Space is defined:
+# "NODE"
+# "ACL"
+# "ANY" node and acl actions
+action_type: NODE
+# observation space
+observation_space:
+  # flatten: true
+  components:
+    - name: NODE_LINK_TABLE
+    # - name: NODE_STATUSES
+    # - name: LINK_TRAFFIC_LEVELS
+
+
+# Number of episodes for training to run per session
+num_train_episodes: 30
+
+# Number of time_steps for training per episode
+num_train_steps: 1
+
+# Number of episodes for evaluation to run per session
+num_eval_episodes: 10
+
+# Number of time_steps for evaluation per episode
+num_eval_steps: 10
+
+# Sets how often the agent will save a checkpoint (every n time episodes).
+# Set to 0 if no checkpoints are required. Default is 10
+checkpoint_every_n_episodes: 10
+
+# Time delay (milliseconds) between steps for CUSTOM agents.
+time_delay: 5
+
+# Type of session to be run. Options are:
+# "TRAIN" (Trains an agent)
+# "EVAL" (Evaluates an agent)
+# "TRAIN_EVAL" (Trains then evaluates an agent)
+session_type: EVAL
+
+# Environment config values
+# The high value for the observation space
+observation_space_high_value: 1000000000
+
+# The Stable Baselines3 learn/eval output verbosity level:
+# Options are:
+# "NONE" (No Output)
+# "INFO" (Info Messages (such as devices and wrappers used))
+# "DEBUG" (All Messages)
+sb3_output_verbose_level: NONE
+
+# Reward values
+# Generic
+all_ok: 0
+# Node Hardware State
+off_should_be_on: -10
+off_should_be_resetting: -5
+on_should_be_off: -2
+on_should_be_resetting: -5
+resetting_should_be_on: -5
+resetting_should_be_off: -2
+resetting: -3
+# Node Software or Service State
+good_should_be_patching: 2
+good_should_be_compromised: 5
+good_should_be_overwhelmed: 5
+patching_should_be_good: -5
+patching_should_be_compromised: 2
+patching_should_be_overwhelmed: 2
+patching: -3
+compromised_should_be_good: -20
+compromised_should_be_patching: -20
+compromised_should_be_overwhelmed: -20
+compromised: -20
+overwhelmed_should_be_good: -20
+overwhelmed_should_be_patching: -20
+overwhelmed_should_be_compromised: -20
+overwhelmed: -20
+# Node File System State
+good_should_be_repairing: 2
+good_should_be_restoring: 2
+good_should_be_corrupt: 5
+good_should_be_destroyed: 10
+repairing_should_be_good: -5
+repairing_should_be_restoring: 2
+repairing_should_be_corrupt: 2
+repairing_should_be_destroyed: 0
+repairing: -3
+restoring_should_be_good: -10
+restoring_should_be_repairing: -2
+restoring_should_be_corrupt: 1
+restoring_should_be_destroyed: 2
+restoring: -6
+corrupt_should_be_good: -10
+corrupt_should_be_repairing: -10
+corrupt_should_be_restoring: -10
+corrupt_should_be_destroyed: 2
+corrupt: -10
+destroyed_should_be_good: -20
+destroyed_should_be_repairing: -20
+destroyed_should_be_restoring: -20
+destroyed_should_be_corrupt: -20
+destroyed: -20
+scanning: -2
+# IER status
+red_ier_running: -5
+green_ier_blocked: -10
+
+# Patching / Reset durations
+os_patching_duration: 5            # The time taken to patch the OS
+node_reset_duration: 5             # The time taken to reset a node (hardware)
+service_patching_duration: 5       # The time taken to patch a service
+file_system_repairing_limit: 5      # The time take to repair the file system
+file_system_restoring_limit: 5      # The time take to restore the file system
+file_system_scanning_limit: 5       # The time taken to scan the file system
diff --git a/tests/config/train_eval_check_episode_step.yaml b/tests/config/train_eval_check_episode_step.yaml
new file mode 100644
index 00000000..f616116e
--- /dev/null
+++ b/tests/config/train_eval_check_episode_step.yaml
@@ -0,0 +1,153 @@
+# Training Config File
+
+# Sets which agent algorithm framework will be used.
+# Options are:
+# "SB3" (Stable Baselines3)
+# "RLLIB" (Ray RLlib)
+# "CUSTOM" (Custom Agent)
+agent_framework: SB3
+
+# Sets which deep learning framework will be used (by RLlib ONLY).
+# Default is TF (Tensorflow).
+# Options are:
+# "TF" (Tensorflow)
+# TF2 (Tensorflow 2.X)
+# TORCH (PyTorch)
+deep_learning_framework: TF2
+
+# Sets which Agent class will be used.
+# Options are:
+# "A2C" (Advantage Actor Critic coupled with either SB3 or RLLIB agent_framework)
+# "PPO" (Proximal Policy Optimization coupled with either SB3 or RLLIB agent_framework)
+# "HARDCODED" (The HardCoded agents coupled with an ACL or NODE action_type)
+# "DO_NOTHING" (The DoNothing agents coupled with an ACL or NODE action_type)
+# "RANDOM" (primaite.agents.simple.RandomAgent)
+# "DUMMY" (primaite.agents.simple.DummyAgent)
+agent_identifier: PPO
+
+# Sets whether Red Agent POL and IER is randomised.
+# Options are:
+# True
+# False
+random_red_agent: False
+
+# Sets what view of the environment the deterministic hardcoded agent has. The default is BASIC.
+# Options are:
+# "BASIC" (The current observation space only)
+# "FULL" (Full environment view with actions taken and reward feedback)
+hard_coded_agent_view: FULL
+
+# Sets How the Action Space is defined:
+# "NODE"
+# "ACL"
+# "ANY" node and acl actions
+action_type: NODE
+# observation space
+observation_space:
+  # flatten: true
+  components:
+    - name: NODE_LINK_TABLE
+    # - name: NODE_STATUSES
+    # - name: LINK_TRAFFIC_LEVELS
+
+
+# Number of episodes for training to run per session
+num_train_episodes: 30
+
+# Number of time_steps for training per episode
+num_train_steps: 1
+
+# Number of episodes for evaluation to run per session
+num_eval_episodes: 10
+
+# Number of time_steps for evaluation per episode
+num_eval_steps: 10
+
+# Sets how often the agent will save a checkpoint (every n time episodes).
+# Set to 0 if no checkpoints are required. Default is 10
+checkpoint_every_n_episodes: 10
+
+# Time delay (milliseconds) between steps for CUSTOM agents.
+time_delay: 5
+
+# Type of session to be run. Options are:
+# "TRAIN" (Trains an agent)
+# "EVAL" (Evaluates an agent)
+# "TRAIN_EVAL" (Trains then evaluates an agent)
+session_type: TRAIN
+
+# Environment config values
+# The high value for the observation space
+observation_space_high_value: 1000000000
+
+# The Stable Baselines3 learn/eval output verbosity level:
+# Options are:
+# "NONE" (No Output)
+# "INFO" (Info Messages (such as devices and wrappers used))
+# "DEBUG" (All Messages)
+sb3_output_verbose_level: NONE
+
+# Reward values
+# Generic
+all_ok: 0
+# Node Hardware State
+off_should_be_on: -10
+off_should_be_resetting: -5
+on_should_be_off: -2
+on_should_be_resetting: -5
+resetting_should_be_on: -5
+resetting_should_be_off: -2
+resetting: -3
+# Node Software or Service State
+good_should_be_patching: 2
+good_should_be_compromised: 5
+good_should_be_overwhelmed: 5
+patching_should_be_good: -5
+patching_should_be_compromised: 2
+patching_should_be_overwhelmed: 2
+patching: -3
+compromised_should_be_good: -20
+compromised_should_be_patching: -20
+compromised_should_be_overwhelmed: -20
+compromised: -20
+overwhelmed_should_be_good: -20
+overwhelmed_should_be_patching: -20
+overwhelmed_should_be_compromised: -20
+overwhelmed: -20
+# Node File System State
+good_should_be_repairing: 2
+good_should_be_restoring: 2
+good_should_be_corrupt: 5
+good_should_be_destroyed: 10
+repairing_should_be_good: -5
+repairing_should_be_restoring: 2
+repairing_should_be_corrupt: 2
+repairing_should_be_destroyed: 0
+repairing: -3
+restoring_should_be_good: -10
+restoring_should_be_repairing: -2
+restoring_should_be_corrupt: 1
+restoring_should_be_destroyed: 2
+restoring: -6
+corrupt_should_be_good: -10
+corrupt_should_be_repairing: -10
+corrupt_should_be_restoring: -10
+corrupt_should_be_destroyed: 2
+corrupt: -10
+destroyed_should_be_good: -20
+destroyed_should_be_repairing: -20
+destroyed_should_be_restoring: -20
+destroyed_should_be_corrupt: -20
+destroyed: -20
+scanning: -2
+# IER status
+red_ier_running: -5
+green_ier_blocked: -10
+
+# Patching / Reset durations
+os_patching_duration: 5            # The time taken to patch the OS
+node_reset_duration: 5             # The time taken to reset a node (hardware)
+service_patching_duration: 5       # The time taken to patch a service
+file_system_repairing_limit: 5      # The time take to repair the file system
+file_system_restoring_limit: 5      # The time take to restore the file system
+file_system_scanning_limit: 5       # The time taken to scan the file system
diff --git a/tests/config/training_config_main.yaml b/tests/config/training_config_main.yaml
new file mode 100644
index 00000000..3351d66b
--- /dev/null
+++ b/tests/config/training_config_main.yaml
@@ -0,0 +1,153 @@
+# Training Config File
+
+# Sets which agent algorithm framework will be used.
+# Options are:
+# "SB3" (Stable Baselines3)
+# "RLLIB" (Ray RLlib)
+# "CUSTOM" (Custom Agent)
+agent_framework: SB3
+
+# Sets which deep learning framework will be used (by RLlib ONLY).
+# Default is TF (Tensorflow).
+# Options are:
+# "TF" (Tensorflow)
+# TF2 (Tensorflow 2.X)
+# TORCH (PyTorch)
+deep_learning_framework: TF2
+
+# Sets which Agent class will be used.
+# Options are:
+# "A2C" (Advantage Actor Critic coupled with either SB3 or RLLIB agent_framework)
+# "PPO" (Proximal Policy Optimization coupled with either SB3 or RLLIB agent_framework)
+# "HARDCODED" (The HardCoded agents coupled with an ACL or NODE action_type)
+# "DO_NOTHING" (The DoNothing agents coupled with an ACL or NODE action_type)
+# "RANDOM" (primaite.agents.simple.RandomAgent)
+# "DUMMY" (primaite.agents.simple.DummyAgent)
+agent_identifier: PPO
+
+# Sets whether Red Agent POL and IER is randomised.
+# Options are:
+# True
+# False
+random_red_agent: False
+
+# Sets what view of the environment the deterministic hardcoded agent has. The default is BASIC.
+# Options are:
+# "BASIC" (The current observation space only)
+# "FULL" (Full environment view with actions taken and reward feedback)
+hard_coded_agent_view: FULL
+
+# Sets How the Action Space is defined:
+# "NODE"
+# "ACL"
+# "ANY" node and acl actions
+action_type: NODE
+# observation space
+observation_space:
+  # flatten: true
+  components:
+    - name: NODE_LINK_TABLE
+    # - name: NODE_STATUSES
+    # - name: LINK_TRAFFIC_LEVELS
+
+
+# Number of episodes for training to run per session
+num_train_episodes: 10
+
+# Number of time_steps for training per episode
+num_train_steps: 256
+
+# Number of episodes for evaluation to run per session
+num_eval_episodes: 10
+
+# Number of time_steps for evaluation per episode
+num_eval_steps: 256
+
+# Sets how often the agent will save a checkpoint (every n time episodes).
+# Set to 0 if no checkpoints are required. Default is 10
+checkpoint_every_n_episodes: 10
+
+# Time delay (milliseconds) between steps for CUSTOM agents.
+time_delay: 5
+
+# Type of session to be run. Options are:
+# "TRAIN" (Trains an agent)
+# "EVAL" (Evaluates an agent)
+# "TRAIN_EVAL" (Trains then evaluates an agent)
+session_type: TRAIN
+
+# Environment config values
+# The high value for the observation space
+observation_space_high_value: 1000000000
+
+# The Stable Baselines3 learn/eval output verbosity level:
+# Options are:
+# "NONE" (No Output)
+# "INFO" (Info Messages (such as devices and wrappers used))
+# "DEBUG" (All Messages)
+sb3_output_verbose_level: NONE
+
+# Reward values
+# Generic
+all_ok: 0
+# Node Hardware State
+off_should_be_on: -10
+off_should_be_resetting: -5
+on_should_be_off: -2
+on_should_be_resetting: -5
+resetting_should_be_on: -5
+resetting_should_be_off: -2
+resetting: -3
+# Node Software or Service State
+good_should_be_patching: 2
+good_should_be_compromised: 5
+good_should_be_overwhelmed: 5
+patching_should_be_good: -5
+patching_should_be_compromised: 2
+patching_should_be_overwhelmed: 2
+patching: -3
+compromised_should_be_good: -20
+compromised_should_be_patching: -20
+compromised_should_be_overwhelmed: -20
+compromised: -20
+overwhelmed_should_be_good: -20
+overwhelmed_should_be_patching: -20
+overwhelmed_should_be_compromised: -20
+overwhelmed: -20
+# Node File System State
+good_should_be_repairing: 2
+good_should_be_restoring: 2
+good_should_be_corrupt: 5
+good_should_be_destroyed: 10
+repairing_should_be_good: -5
+repairing_should_be_restoring: 2
+repairing_should_be_corrupt: 2
+repairing_should_be_destroyed: 0
+repairing: -3
+restoring_should_be_good: -10
+restoring_should_be_repairing: -2
+restoring_should_be_corrupt: 1
+restoring_should_be_destroyed: 2
+restoring: -6
+corrupt_should_be_good: -10
+corrupt_should_be_repairing: -10
+corrupt_should_be_restoring: -10
+corrupt_should_be_destroyed: 2
+corrupt: -10
+destroyed_should_be_good: -20
+destroyed_should_be_repairing: -20
+destroyed_should_be_restoring: -20
+destroyed_should_be_corrupt: -20
+destroyed: -20
+scanning: -2
+# IER status
+red_ier_running: -5
+green_ier_blocked: -10
+
+# Patching / Reset durations
+os_patching_duration: 5            # The time taken to patch the OS
+node_reset_duration: 5             # The time taken to reset a node (hardware)
+service_patching_duration: 5       # The time taken to patch a service
+file_system_repairing_limit: 5      # The time take to repair the file system
+file_system_restoring_limit: 5      # The time take to restore the file system
+file_system_scanning_limit: 5       # The time taken to scan the file system
diff --git a/tests/conftest.py b/tests/conftest.py
index af76b314..7e06bea7 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -163,8 +163,9 @@ def run_generic(env, config_values):
     """Run against a generic agent."""
     # Reset the environment at the start of the episode
     # env.reset()
-    for episode in range(0, config_values.num_episodes):
-        for step in range(0, config_values.num_steps):
+    print(config_values.num_train_episodes, "how many episodes")
+    for episode in range(0, config_values.num_train_episodes):
+        for step in range(0, config_values.num_train_steps):
             # Send the observation space to the agent to get an action
             # TEMP - random action for now
             # action = env.blue_agent_action(obs)
diff --git a/tests/test_single_action_space.py b/tests/test_single_action_space.py
index 5d55b9c9..bfcffd42 100644
--- a/tests/test_single_action_space.py
+++ b/tests/test_single_action_space.py
@@ -12,8 +12,8 @@ def run_generic_set_actions(env: Primaite):
     # Reset the environment at the start of the episode
     # env.reset()
     training_config = env.training_config
-    for episode in range(0, training_config.num_episodes):
-        for step in range(0, training_config.num_steps):
+    for episode in range(0, training_config.num_train_episodes):
+        for step in range(0, training_config.num_train_steps):
             # Send the observation space to the agent to get an action
             # TEMP - random action for now
             # action = env.blue_agent_action(obs)
diff --git a/tests/test_train_eval_episode_steps.py b/tests/test_train_eval_episode_steps.py
new file mode 100644
index 00000000..fad30f1b
--- /dev/null
+++ b/tests/test_train_eval_episode_steps.py
@@ -0,0 +1,46 @@
+import pytest
+
+from primaite import getLogger
+from primaite.config.lay_down_config import dos_very_basic_config_path
+from tests import TEST_CONFIG_ROOT
+from tests.conftest import run_generic
+
+_LOGGER = getLogger(__name__)
+
+
+@pytest.mark.parametrize(
+    "temp_primaite_session",
+    [[TEST_CONFIG_ROOT / "train_episode_step.yaml", dos_very_basic_config_path()]],
+    indirect=True,
+)
+def test_eval_steps_differ_from_training(temp_primaite_session):
+    """Uses PrimaiteSession class to compare number of episodes used for training and evaluation."""
+    with temp_primaite_session as train_session:
+        env = train_session.env
+        train_session.learn()
+
+    """
+    Train_episode_step.yaml main config:
+    num_train_steps = 1
+    num_eval_steps = 10
+
+    When the YAML file changes from TRAIN to EVAL the episode step changes and uses the other config value.
+
+    The test is showing that 10 steps are running for evaluation and NOT 1 step as EVAL has been selected in the config.
+    """
+    assert env.episode_steps == 10  # 30
+    # assert env.actual_episode_count == 10 # should be 10
+
+
+@pytest.mark.parametrize(
+    "temp_primaite_session",
+    [[TEST_CONFIG_ROOT / "train_episode_step.yaml", dos_very_basic_config_path()]],
+    indirect=True,
+)
+def test_train_eval_config_option(temp_primaite_session):
+    """Uses PrimaiteSession class to test number of episodes and steps used for TRAIN and EVAL option."""
+    with temp_primaite_session as train_session:
+        env = train_session.env
+        run_generic(env, env.training_config)
+
+    print(env.actual_episode_count, env.step_count, env.total_step_count)

From 40381833d3bae56ae95be5550e0d6c44992017bc Mon Sep 17 00:00:00 2001
From: Chris McCarthy <chris.mccarthy@methods.co.uk>
Date: Fri, 7 Jul 2023 15:50:14 +0100
Subject: [PATCH 3/9] #1566 - Refactored the test_train_eval_episode_steps.py
 to sue TempPrimaiteSession. - Fixed all errors that were caused b fixing the
 above. - Some tests still fail, these are for SS to fix. - Dropped the old
 run_generic stuff from conftest.py

---
 src/primaite/agents/agent.py                  |  14 +-
 src/primaite/agents/rllib.py                  |   9 +-
 src/primaite/agents/sb3.py                    |   6 +-
 .../training/training_config_main.yaml        |   2 +-
 src/primaite/config/training_config.py        |   6 +-
 src/primaite/environment/primaite_env.py      |  10 +-
 src/primaite/environment/reward.py            |   1 -
 src/primaite/utils/session_output_reader.py   |   5 +-
 ...ne_node_states_on_off_lay_down_config.yaml |   5 -
 .../one_node_states_on_off_main_config.yaml   |   8 +-
 tests/config/train_episode_step.yaml          |  12 +-
 .../config/train_eval_check_episode_step.yaml | 153 ------------------
 tests/conftest.py                             |  67 +-------
 tests/test_reward.py                          |   4 +-
 tests/test_train_eval_episode_steps.py        |  46 +++---
 15 files changed, 69 insertions(+), 279 deletions(-)
 delete mode 100644 tests/config/train_eval_check_episode_step.yaml

diff --git a/src/primaite/agents/agent.py b/src/primaite/agents/agent.py
index 2cdb242b..883e844b 100644
--- a/src/primaite/agents/agent.py
+++ b/src/primaite/agents/agent.py
@@ -153,12 +153,11 @@ class AgentSessionABC(ABC):
             metadata_dict = json.load(file)
 
         metadata_dict["end_datetime"] = datetime.now().isoformat()
-
         if not self.is_eval:
-            metadata_dict["learning"]["total_episodes"] = self._env.episode_count  # noqa
+            metadata_dict["learning"]["total_episodes"] = self._env.actual_episode_count  # noqa
             metadata_dict["learning"]["total_time_steps"] = self._env.total_step_count  # noqa
         else:
-            metadata_dict["evaluation"]["total_episodes"] = self._env.episode_count  # noqa
+            metadata_dict["evaluation"]["total_episodes"] = self._env.actual_episode_count  # noqa
             metadata_dict["evaluation"]["total_time_steps"] = self._env.total_step_count  # noqa
 
         filepath = self.session_path / "session_metadata.json"
@@ -209,10 +208,11 @@ class AgentSessionABC(ABC):
 
         :param kwargs: Any agent-specific key-word args to be passed.
         """
-        self._env.set_as_eval()  # noqa
-        self.is_eval = True
-        self._plot_av_reward_per_episode(learning_session=False)
-        _LOGGER.info("Finished evaluation")
+        if self._can_evaluate:
+            self._plot_av_reward_per_episode(learning_session=False)
+            self._update_session_metadata_file()
+            self.is_eval = True
+            _LOGGER.info("Finished evaluation")
 
     @abstractmethod
     def _get_latest_checkpoint(self):
diff --git a/src/primaite/agents/rllib.py b/src/primaite/agents/rllib.py
index 28d21e20..7067f6a6 100644
--- a/src/primaite/agents/rllib.py
+++ b/src/primaite/agents/rllib.py
@@ -85,8 +85,12 @@ class RLlibAgent(AgentSessionABC):
             metadata_dict = json.load(file)
 
         metadata_dict["end_datetime"] = datetime.now().isoformat()
-        metadata_dict["total_episodes"] = self._current_result["episodes_total"]
-        metadata_dict["total_time_steps"] = self._current_result["timesteps_total"]
+        if not self.is_eval:
+            metadata_dict["learning"]["total_episodes"] = self._current_result["episodes_total"]  # noqa
+            metadata_dict["learning"]["total_time_steps"] = self._current_result["timesteps_total"]  # noqa
+        else:
+            metadata_dict["evaluation"]["total_episodes"] = self._current_result["episodes_total"]  # noqa
+            metadata_dict["evaluation"]["total_time_steps"] = self._current_result["timesteps_total"]  # noqa
 
         filepath = self.session_path / "session_metadata.json"
         _LOGGER.debug(f"Updating Session Metadata file: {filepath}")
@@ -150,7 +154,6 @@ class RLlibAgent(AgentSessionABC):
 
         super().learn()
 
-
     def evaluate(
         self,
         **kwargs,
diff --git a/src/primaite/agents/sb3.py b/src/primaite/agents/sb3.py
index 00983140..dc049e91 100644
--- a/src/primaite/agents/sb3.py
+++ b/src/primaite/agents/sb3.py
@@ -58,7 +58,7 @@ class SB3Agent(AgentSessionABC):
             PPOMlp,
             self._env,
             verbose=self.sb3_output_verbose_level,
-            n_steps=self._training_config.num_eval_steps,
+            n_steps=self._training_config.num_train_steps,
             tensorboard_log=str(self._tensorboard_log_path),
             seed=self._training_config.seed,
         )
@@ -93,7 +93,7 @@ class SB3Agent(AgentSessionABC):
         for i in range(episodes):
             self._agent.learn(total_timesteps=time_steps)
             self._save_checkpoint()
-        self._env.reset()
+        self._env._write_av_reward_per_episode()  # noqa
         self.save()
         self._env.close()
         super().learn()
@@ -129,7 +129,7 @@ class SB3Agent(AgentSessionABC):
                 if isinstance(action, np.ndarray):
                     action = np.int64(action)
                 obs, rewards, done, info = self._env.step(action)
-        self._env.reset()
+        self._env._write_av_reward_per_episode()  # noqa
         self._env.close()
         super().evaluate()
 
diff --git a/src/primaite/config/_package_data/training/training_config_main.yaml b/src/primaite/config/_package_data/training/training_config_main.yaml
index f45f976a..61c45758 100644
--- a/src/primaite/config/_package_data/training/training_config_main.yaml
+++ b/src/primaite/config/_package_data/training/training_config_main.yaml
@@ -68,7 +68,7 @@ num_train_episodes: 10
 num_train_steps: 256
 
 # Number of episodes for evaluation to run per session
-num_eval_episodes: 10
+num_eval_episodes: 1
 
 # Number of time_steps for evaluation per episode
 num_eval_steps: 256
diff --git a/src/primaite/config/training_config.py b/src/primaite/config/training_config.py
index 2b46e513..5bbe881b 100644
--- a/src/primaite/config/training_config.py
+++ b/src/primaite/config/training_config.py
@@ -66,7 +66,7 @@ class TrainingConfig:
     num_train_steps: int = 256
     "The number of steps in an episode during an training session"
 
-    num_eval_episodes: int = 10
+    num_eval_episodes: int = 1
     "The number of episodes to train over during an evaluation session"
 
     num_eval_steps: int = 256
@@ -242,10 +242,10 @@ class TrainingConfig:
             tc += f"{self.hard_coded_agent_view}, "
         tc += f"{self.action_type}, "
         tc += f"observation_space={self.observation_space}, "
-        if self.session_type.name == "TRAIN":
+        if self.session_type is SessionType.TRAIN:
             tc += f"{self.num_train_episodes} episodes @ "
             tc += f"{self.num_train_steps} steps"
-        elif self.session_type.name == "EVAL":
+        elif self.session_type is SessionType.EVAL:
             tc += f"{self.num_eval_episodes} episodes @ "
             tc += f"{self.num_eval_steps} steps"
         else:
diff --git a/src/primaite/environment/primaite_env.py b/src/primaite/environment/primaite_env.py
index 18cf8767..ed6eefb2 100644
--- a/src/primaite/environment/primaite_env.py
+++ b/src/primaite/environment/primaite_env.py
@@ -261,6 +261,11 @@ class Primaite(Env):
         self.total_step_count = 0
         self.episode_steps = self.training_config.num_eval_steps
 
+    def _write_av_reward_per_episode(self):
+        if self.actual_episode_count > 0:
+            csv_data = self.actual_episode_count, self.average_reward
+            self.episode_av_reward_writer.write(csv_data)
+
     def reset(self):
         """
         AI Gym Reset function.
@@ -268,10 +273,7 @@ class Primaite(Env):
         Returns:
              Environment observation space (reset)
         """
-        if self.actual_episode_count > 0:
-            csv_data = self.actual_episode_count, self.average_reward
-            self.episode_av_reward_writer.write(csv_data)
-
+        self._write_av_reward_per_episode()
         self.episode_count += 1
 
         # Don't need to reset links, as they are cleared and recalculated every
diff --git a/src/primaite/environment/reward.py b/src/primaite/environment/reward.py
index e4353cb9..9cbb0078 100644
--- a/src/primaite/environment/reward.py
+++ b/src/primaite/environment/reward.py
@@ -90,7 +90,6 @@ def calculate_reward_function(
                         f"Penalty of {ier_reward} was NOT applied."
                     )
                 )
-
     return reward_value
 
 
diff --git a/src/primaite/utils/session_output_reader.py b/src/primaite/utils/session_output_reader.py
index d04f375e..eb7a7675 100644
--- a/src/primaite/utils/session_output_reader.py
+++ b/src/primaite/utils/session_output_reader.py
@@ -16,5 +16,6 @@ def av_rewards_dict(av_rewards_csv_file: Union[str, Path]) -> Dict[int, float]:
     :param av_rewards_csv_file: The average rewards per episode csv file path.
     :return: The average rewards per episode cdv as a dict.
     """
-    d = pl.read_csv(av_rewards_csv_file).to_dict()
-    return {v: d["Average Reward"][i] for i, v in enumerate(d["Episode"])}
+    df = pl.read_csv(av_rewards_csv_file).to_dict()
+
+    return {v: df["Average Reward"][i] for i, v in enumerate(df["Episode"])}
diff --git a/tests/config/one_node_states_on_off_lay_down_config.yaml b/tests/config/one_node_states_on_off_lay_down_config.yaml
index 996cf368..aadbd449 100644
--- a/tests/config/one_node_states_on_off_lay_down_config.yaml
+++ b/tests/config/one_node_states_on_off_lay_down_config.yaml
@@ -18,11 +18,6 @@
   - name: ftp
     port: '21'
     state: GOOD
-- item_type: POSITION
-  positions:
-  - node: '1'
-    x_pos: 309
-    y_pos: 78
 - item_type: RED_POL
   id: '1'
   start_step: 1
diff --git a/tests/config/one_node_states_on_off_main_config.yaml b/tests/config/one_node_states_on_off_main_config.yaml
index 63fdd1a5..dd425a8c 100644
--- a/tests/config/one_node_states_on_off_main_config.yaml
+++ b/tests/config/one_node_states_on_off_main_config.yaml
@@ -22,17 +22,13 @@ agent_identifier: DUMMY
 # "ACL"
 # "ANY" node and acl actions
 action_type: NODE
-# Number of episodes for training to run per session
-num_train_episodes: 10
 
-# Number of time_steps for training per episode
-num_train_steps: 256
 
 # Number of episodes for evaluation to run per session
-num_eval_episodes: 10
+num_eval_episodes: 1
 
 # Number of time_steps for evaluation per episode
-num_eval_steps: 256
+num_eval_steps: 15
 # Time delay between steps (for generic agents)
 time_delay: 1
 
diff --git a/tests/config/train_episode_step.yaml b/tests/config/train_episode_step.yaml
index 550b95fd..f112b741 100644
--- a/tests/config/train_episode_step.yaml
+++ b/tests/config/train_episode_step.yaml
@@ -52,20 +52,20 @@ observation_space:
 
 
 # Number of episodes for training to run per session
-num_train_episodes: 30
+num_train_episodes: 3
 
 # Number of time_steps for training per episode
-num_train_steps: 1
+num_train_steps: 25
 
 # Number of episodes for evaluation to run per session
-num_eval_episodes: 10
+num_eval_episodes: 1
 
 # Number of time_steps for evaluation per episode
-num_eval_steps: 10
+num_eval_steps: 17
 
 # Sets how often the agent will save a checkpoint (every n time episodes).
 # Set to 0 if no checkpoints are required. Default is 10
-checkpoint_every_n_episodes: 10
+checkpoint_every_n_episodes: 0
 
 # Time delay (milliseconds) between steps for CUSTOM agents.
 time_delay: 5
@@ -74,7 +74,7 @@ time_delay: 5
 # "TRAIN" (Trains an agent)
 # "EVAL" (Evaluates an agent)
 # "TRAIN_EVAL" (Trains then evaluates an agent)
-session_type: EVAL
+session_type: TRAIN_EVAL
 
 # Environment config values
 # The high value for the observation space
diff --git a/tests/config/train_eval_check_episode_step.yaml b/tests/config/train_eval_check_episode_step.yaml
deleted file mode 100644
index f616116e..00000000
--- a/tests/config/train_eval_check_episode_step.yaml
+++ /dev/null
@@ -1,153 +0,0 @@
-# Training Config File
-
-# Sets which agent algorithm framework will be used.
-# Options are:
-# "SB3" (Stable Baselines3)
-# "RLLIB" (Ray RLlib)
-# "CUSTOM" (Custom Agent)
-agent_framework: SB3
-
-# Sets which deep learning framework will be used (by RLlib ONLY).
-# Default is TF (Tensorflow).
-# Options are:
-# "TF" (Tensorflow)
-# TF2 (Tensorflow 2.X)
-# TORCH (PyTorch)
-deep_learning_framework: TF2
-
-# Sets which Agent class will be used.
-# Options are:
-# "A2C" (Advantage Actor Critic coupled with either SB3 or RLLIB agent_framework)
-# "PPO" (Proximal Policy Optimization coupled with either SB3 or RLLIB agent_framework)
-# "HARDCODED" (The HardCoded agents coupled with an ACL or NODE action_type)
-# "DO_NOTHING" (The DoNothing agents coupled with an ACL or NODE action_type)
-# "RANDOM" (primaite.agents.simple.RandomAgent)
-# "DUMMY" (primaite.agents.simple.DummyAgent)
-agent_identifier: PPO
-
-# Sets whether Red Agent POL and IER is randomised.
-# Options are:
-# True
-# False
-random_red_agent: False
-
-# Sets what view of the environment the deterministic hardcoded agent has. The default is BASIC.
-# Options are:
-# "BASIC" (The current observation space only)
-# "FULL" (Full environment view with actions taken and reward feedback)
-hard_coded_agent_view: FULL
-
-# Sets How the Action Space is defined:
-# "NODE"
-# "ACL"
-# "ANY" node and acl actions
-action_type: NODE
-# observation space
-observation_space:
-  # flatten: true
-  components:
-    - name: NODE_LINK_TABLE
-    # - name: NODE_STATUSES
-    # - name: LINK_TRAFFIC_LEVELS
-
-
-# Number of episodes for training to run per session
-num_train_episodes: 30
-
-# Number of time_steps for training per episode
-num_train_steps: 1
-
-# Number of episodes for evaluation to run per session
-num_eval_episodes: 10
-
-# Number of time_steps for evaluation per episode
-num_eval_steps: 10
-
-# Sets how often the agent will save a checkpoint (every n time episodes).
-# Set to 0 if no checkpoints are required. Default is 10
-checkpoint_every_n_episodes: 10
-
-# Time delay (milliseconds) between steps for CUSTOM agents.
-time_delay: 5
-
-# Type of session to be run. Options are:
-# "TRAIN" (Trains an agent)
-# "EVAL" (Evaluates an agent)
-# "TRAIN_EVAL" (Trains then evaluates an agent)
-session_type: TRAIN
-
-# Environment config values
-# The high value for the observation space
-observation_space_high_value: 1000000000
-
-# The Stable Baselines3 learn/eval output verbosity level:
-# Options are:
-# "NONE" (No Output)
-# "INFO" (Info Messages (such as devices and wrappers used))
-# "DEBUG" (All Messages)
-sb3_output_verbose_level: NONE
-
-# Reward values
-# Generic
-all_ok: 0
-# Node Hardware State
-off_should_be_on: -10
-off_should_be_resetting: -5
-on_should_be_off: -2
-on_should_be_resetting: -5
-resetting_should_be_on: -5
-resetting_should_be_off: -2
-resetting: -3
-# Node Software or Service State
-good_should_be_patching: 2
-good_should_be_compromised: 5
-good_should_be_overwhelmed: 5
-patching_should_be_good: -5
-patching_should_be_compromised: 2
-patching_should_be_overwhelmed: 2
-patching: -3
-compromised_should_be_good: -20
-compromised_should_be_patching: -20
-compromised_should_be_overwhelmed: -20
-compromised: -20
-overwhelmed_should_be_good: -20
-overwhelmed_should_be_patching: -20
-overwhelmed_should_be_compromised: -20
-overwhelmed: -20
-# Node File System State
-good_should_be_repairing: 2
-good_should_be_restoring: 2
-good_should_be_corrupt: 5
-good_should_be_destroyed: 10
-repairing_should_be_good: -5
-repairing_should_be_restoring: 2
-repairing_should_be_corrupt: 2
-repairing_should_be_destroyed: 0
-repairing: -3
-restoring_should_be_good: -10
-restoring_should_be_repairing: -2
-restoring_should_be_corrupt: 1
-restoring_should_be_destroyed: 2
-restoring: -6
-corrupt_should_be_good: -10
-corrupt_should_be_repairing: -10
-corrupt_should_be_restoring: -10
-corrupt_should_be_destroyed: 2
-corrupt: -10
-destroyed_should_be_good: -20
-destroyed_should_be_repairing: -20
-destroyed_should_be_restoring: -20
-destroyed_should_be_corrupt: -20
-destroyed: -20
-scanning: -2
-# IER status
-red_ier_running: -5
-green_ier_blocked: -10
-
-# Patching / Reset durations
-os_patching_duration: 5            # The time taken to patch the OS
-node_reset_duration: 5             # The time taken to reset a node (hardware)
-service_patching_duration: 5       # The time taken to patch a service
-file_system_repairing_limit: 5      # The time take to repair the file system
-file_system_restoring_limit: 5      # The time take to restore the file system
-file_system_scanning_limit: 5       # The time taken to scan the file system
diff --git a/tests/conftest.py b/tests/conftest.py
index 2d78f61d..aaf4dbce 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -1,17 +1,16 @@
 # Crown Copyright (C) Dstl 2022. DEFCON 703. Shared in confidence.
 import datetime
+import json
 import shutil
 import tempfile
-import time
 from datetime import datetime
 from pathlib import Path
-from typing import Dict, Union
+from typing import Any, Dict, Union
 from unittest.mock import patch
 
 import pytest
 
 from primaite import getLogger
-from primaite.common.enums import AgentIdentifier
 from primaite.environment.primaite_env import Primaite
 from primaite.primaite_session import PrimaiteSession
 from primaite.utils.session_output_reader import av_rewards_dict
@@ -48,6 +47,11 @@ class TempPrimaiteSession(PrimaiteSession):
         csv_file = f"average_reward_per_episode_{self.timestamp_str}.csv"
         return av_rewards_dict(self.evaluation_path / csv_file)
 
+    def metadata_file_as_dict(self) -> Dict[str, Any]:
+        """Read the session_metadata.json file and return as a dict."""
+        with open(self.session_path / "session_metadata.json", "r") as file:
+            return json.load(file)
+
     @property
     def env(self) -> Primaite:
         """Direct access to the env for ease of testing."""
@@ -58,6 +62,7 @@ class TempPrimaiteSession(PrimaiteSession):
 
     def __exit__(self, type, value, tb):
         shutil.rmtree(self.session_path)
+        shutil.rmtree(self.session_path.parent)
         _LOGGER.debug(f"Deleted temp session directory: {self.session_path}")
 
 
@@ -129,59 +134,3 @@ def temp_session_path() -> Path:
     session_path.mkdir(exist_ok=True, parents=True)
 
     return session_path
-
-
-def _get_primaite_env_from_config(
-    training_config_path: Union[str, Path],
-    lay_down_config_path: Union[str, Path],
-    temp_session_path,
-):
-    """Takes a config path and returns the created instance of Primaite."""
-    session_timestamp: datetime = datetime.now()
-    session_path = temp_session_path(session_timestamp)
-
-    timestamp_str = session_timestamp.strftime("%Y-%m-%d_%H-%M-%S")
-    env = Primaite(
-        training_config_path=training_config_path,
-        lay_down_config_path=lay_down_config_path,
-        session_path=session_path,
-        timestamp_str=timestamp_str,
-    )
-    config_values = env.training_config
-    config_values.num_steps = env.episode_steps
-
-    # TOOD: This needs t be refactored to happen outside. Should be part of
-    # a main Session class.
-    if env.training_config.agent_identifier is AgentIdentifier.RANDOM:
-        run_generic(env, config_values)
-
-    return env
-
-
-def run_generic(env, config_values):
-    """Run against a generic agent."""
-    # Reset the environment at the start of the episode
-    # env.reset()
-    print(config_values.num_train_episodes, "how many episodes")
-    for episode in range(0, config_values.num_train_episodes):
-        for step in range(0, config_values.num_train_steps):
-            # Send the observation space to the agent to get an action
-            # TEMP - random action for now
-            # action = env.blue_agent_action(obs)
-            # action = env.action_space.sample()
-            action = 0
-
-            # Run the simulation step on the live environment
-            obs, reward, done, info = env.step(action)
-
-            # Break if done is True
-            if done:
-                break
-
-            # Introduce a delay between steps
-            time.sleep(config_values.time_delay / 1000)
-
-        # Reset the environment at the end of the episode
-        # env.reset()
-
-    # env.close()
diff --git a/tests/test_reward.py b/tests/test_reward.py
index 81437860..d1b56671 100644
--- a/tests/test_reward.py
+++ b/tests/test_reward.py
@@ -1,7 +1,10 @@
 import pytest
 
+from primaite import getLogger
 from tests import TEST_CONFIG_ROOT
 
+_LOGGER = getLogger(__name__)
+
 
 @pytest.mark.parametrize(
     "temp_primaite_session",
@@ -44,7 +47,6 @@ def test_rewards_are_being_penalised_at_each_step_function(
         Average Reward: -8 (-120 / 15)
     """
     with temp_primaite_session as session:
-        session.evaluate()
         session.close()
         ev_rewards = session.eval_av_reward_per_episode_csv()
         assert ev_rewards[1] == -8.0
diff --git a/tests/test_train_eval_episode_steps.py b/tests/test_train_eval_episode_steps.py
index fad30f1b..daa93055 100644
--- a/tests/test_train_eval_episode_steps.py
+++ b/tests/test_train_eval_episode_steps.py
@@ -3,7 +3,6 @@ import pytest
 from primaite import getLogger
 from primaite.config.lay_down_config import dos_very_basic_config_path
 from tests import TEST_CONFIG_ROOT
-from tests.conftest import run_generic
 
 _LOGGER = getLogger(__name__)
 
@@ -14,33 +13,30 @@ _LOGGER = getLogger(__name__)
     indirect=True,
 )
 def test_eval_steps_differ_from_training(temp_primaite_session):
-    """Uses PrimaiteSession class to compare number of episodes used for training and evaluation."""
-    with temp_primaite_session as train_session:
-        env = train_session.env
-        train_session.learn()
+    """Uses PrimaiteSession class to compare number of episodes used for training and evaluation.
 
-    """
     Train_episode_step.yaml main config:
-    num_train_steps = 1
-    num_eval_steps = 10
-
-    When the YAML file changes from TRAIN to EVAL the episode step changes and uses the other config value.
-
-    The test is showing that 10 steps are running for evaluation and NOT 1 step as EVAL has been selected in the config.
+        num_train_steps = 25
+        num_train_episodes = 3
+        num_eval_steps = 17
+        num_eval_episodes = 1
     """
-    assert env.episode_steps == 10  # 30
-    # assert env.actual_episode_count == 10 # should be 10
+    expected_learning_metadata = {"total_episodes": 3, "total_time_steps": 75}
 
+    expected_evaluation_metadata = {"total_episodes": 1, "total_time_steps": 17}
+    with temp_primaite_session as session:
+        # Run learning and check episode and step counts
+        session.learn()
+        assert session.env.actual_episode_count == expected_learning_metadata["total_episodes"]
+        assert session.env.total_step_count == expected_learning_metadata["total_time_steps"]
 
-@pytest.mark.parametrize(
-    "temp_primaite_session",
-    [[TEST_CONFIG_ROOT / "train_episode_step.yaml", dos_very_basic_config_path()]],
-    indirect=True,
-)
-def test_train_eval_config_option(temp_primaite_session):
-    """Uses PrimaiteSession class to test number of episodes and steps used for TRAIN and EVAL option."""
-    with temp_primaite_session as train_session:
-        env = train_session.env
-        run_generic(env, env.training_config)
+        # Run evaluation and check episode and step counts
+        session.evaluate()
+        assert session.env.actual_episode_count == expected_evaluation_metadata["total_episodes"]
+        assert session.env.total_step_count == expected_evaluation_metadata["total_time_steps"]
 
-    print(env.actual_episode_count, env.step_count, env.total_step_count)
+        # Load the session_metadata.json file and check that the both the
+        # learning and evaluation match what is expected above
+        metadata = session.metadata_file_as_dict()
+        assert metadata["learning"] == expected_learning_metadata
+        assert metadata["evaluation"] == expected_evaluation_metadata

From 41fab6562e8be6312256b597b7519815ae57938c Mon Sep 17 00:00:00 2001
From: SunilSamra <sunil-samra@nsc.co.uk>
Date: Fri, 7 Jul 2023 16:26:12 +0100
Subject: [PATCH 4/9] 1566 - updated configs to correct values of step count
 and number of episodes

---
 .../obs_tests/main_config_LINK_TRAFFIC_LEVELS.yaml    | 11 ++---------
 .../config/obs_tests/main_config_NODE_LINK_TABLE.yaml | 10 ++--------
 tests/config/obs_tests/main_config_NODE_STATUSES.yaml |  9 ++-------
 tests/config/obs_tests/main_config_without_obs.yaml   | 10 ++--------
 4 files changed, 8 insertions(+), 32 deletions(-)

diff --git a/tests/config/obs_tests/main_config_LINK_TRAFFIC_LEVELS.yaml b/tests/config/obs_tests/main_config_LINK_TRAFFIC_LEVELS.yaml
index bbdce9c1..2ac8f59a 100644
--- a/tests/config/obs_tests/main_config_LINK_TRAFFIC_LEVELS.yaml
+++ b/tests/config/obs_tests/main_config_LINK_TRAFFIC_LEVELS.yaml
@@ -23,17 +23,10 @@ agent_identifier: A2C
 # "ANY" node and acl actions
 action_type: ANY
 # Number of episodes for training to run per session
-num_train_episodes: 10
+num_train_episodes: 1
 
 # Number of time_steps for training per episode
-num_train_steps: 256
-
-# Number of episodes for evaluation to run per session
-num_eval_episodes: 10
-
-# Number of time_steps for evaluation per episode
-num_eval_steps: 256
-
+num_train_steps: 5
 
 observation_space:
   components:
diff --git a/tests/config/obs_tests/main_config_NODE_LINK_TABLE.yaml b/tests/config/obs_tests/main_config_NODE_LINK_TABLE.yaml
index 41b3e588..a9986d5b 100644
--- a/tests/config/obs_tests/main_config_NODE_LINK_TABLE.yaml
+++ b/tests/config/obs_tests/main_config_NODE_LINK_TABLE.yaml
@@ -23,16 +23,10 @@ agent_identifier: RANDOM
 # "ANY" node and acl actions
 action_type: ANY
 # Number of episodes for training to run per session
-num_train_episodes: 10
+num_train_episodes: 1
 
 # Number of time_steps for training per episode
-num_train_steps: 256
-
-# Number of episodes for evaluation to run per session
-num_eval_episodes: 10
-
-# Number of time_steps for evaluation per episode
-num_eval_steps: 256
+num_train_steps: 5
 
 observation_space:
   components:
diff --git a/tests/config/obs_tests/main_config_NODE_STATUSES.yaml b/tests/config/obs_tests/main_config_NODE_STATUSES.yaml
index 34758199..a129712c 100644
--- a/tests/config/obs_tests/main_config_NODE_STATUSES.yaml
+++ b/tests/config/obs_tests/main_config_NODE_STATUSES.yaml
@@ -23,16 +23,11 @@ agent_identifier: RANDOM
 # "ANY" node and acl actions
 action_type: ANY
 # Number of episodes for training to run per session
-num_train_episodes: 10
+num_train_episodes: 1
 
 # Number of time_steps for training per episode
-num_train_steps: 256
+num_train_steps: 5
 
-# Number of episodes for evaluation to run per session
-num_eval_episodes: 10
-
-# Number of time_steps for evaluation per episode
-num_eval_steps: 256
 
 observation_space:
   components:
diff --git a/tests/config/obs_tests/main_config_without_obs.yaml b/tests/config/obs_tests/main_config_without_obs.yaml
index 352e765c..03d11b82 100644
--- a/tests/config/obs_tests/main_config_without_obs.yaml
+++ b/tests/config/obs_tests/main_config_without_obs.yaml
@@ -23,16 +23,10 @@ agent_identifier: RANDOM
 # "ANY" node and acl actions
 action_type: ANY
 # Number of episodes for training to run per session
-num_train_episodes: 10
+num_train_episodes: 1
 
 # Number of time_steps for training per episode
-num_train_steps: 256
-
-# Number of episodes for evaluation to run per session
-num_eval_episodes: 10
-
-# Number of time_steps for evaluation per episode
-num_eval_steps: 256
+num_train_steps: 5
 # Time delay between steps (for generic agents)
 time_delay: 1
 # Type of session to be run (TRAINING or EVALUATION)

From 921dc934c266ed9379e97702bd9019073aa15fd5 Mon Sep 17 00:00:00 2001
From: SunilSamra <sunil-samra@nsc.co.uk>
Date: Mon, 10 Jul 2023 11:25:26 +0100
Subject: [PATCH 5/9] 1566 - added correct num_train_episodes etc values to
 configs, fixed test_reward.py

---
 src/primaite/agents/agent.py                           |  1 +
 tests/config/ppo_not_seeded_training_config.yaml       | 10 ++++++++--
 tests/config/ppo_seeded_training_config.yaml           | 10 ++++++++--
 ...le_action_space_fixed_blue_actions_main_config.yaml |  9 ++-------
 tests/config/single_action_space_lay_down_config.yaml  |  8 --------
 tests/config/test_random_red_main_config.yaml          |  8 ++++----
 tests/test_reward.py                                   |  2 +-
 7 files changed, 24 insertions(+), 24 deletions(-)

diff --git a/src/primaite/agents/agent.py b/src/primaite/agents/agent.py
index 883e844b..95a00f49 100644
--- a/src/primaite/agents/agent.py
+++ b/src/primaite/agents/agent.py
@@ -377,6 +377,7 @@ class HardCodedAgentSessionABC(AgentSessionABC):
                 time.sleep(self._training_config.time_delay / 1000)
             obs = self._env.reset()
         self._env.close()
+        super().evaluate()
 
     @classmethod
     def load(cls):
diff --git a/tests/config/ppo_not_seeded_training_config.yaml b/tests/config/ppo_not_seeded_training_config.yaml
index 23cff44e..14b3f087 100644
--- a/tests/config/ppo_not_seeded_training_config.yaml
+++ b/tests/config/ppo_not_seeded_training_config.yaml
@@ -60,10 +60,16 @@ observation_space:
     # - name: NODE_STATUSES
     # - name: LINK_TRAFFIC_LEVELS
 # Number of episodes to run per session
-num_episodes: 10
+num_train_episodes: 10
 
 # Number of time_steps per episode
-num_steps: 256
+num_train_steps: 256
+
+# Number of episodes to run per session
+num_eval_episodes: 10
+
+# Number of time_steps per episode
+num_eval_steps: 256
 
 # Sets how often the agent will save a checkpoint (every n time episodes).
 # Set to 0 if no checkpoints are required. Default is 10
diff --git a/tests/config/ppo_seeded_training_config.yaml b/tests/config/ppo_seeded_training_config.yaml
index 181331d9..a176c793 100644
--- a/tests/config/ppo_seeded_training_config.yaml
+++ b/tests/config/ppo_seeded_training_config.yaml
@@ -60,10 +60,16 @@ observation_space:
     # - name: NODE_STATUSES
     # - name: LINK_TRAFFIC_LEVELS
 # Number of episodes to run per session
-num_episodes: 10
+num_train_episodes: 10
 
 # Number of time_steps per episode
-num_steps: 256
+num_train_steps: 256
+
+# Number of episodes to run per session
+num_eval_episodes: 1
+
+# Number of time_steps per episode
+num_eval_steps: 256
 
 # Sets how often the agent will save a checkpoint (every n time episodes).
 # Set to 0 if no checkpoints are required. Default is 10
diff --git a/tests/config/single_action_space_fixed_blue_actions_main_config.yaml b/tests/config/single_action_space_fixed_blue_actions_main_config.yaml
index 859b2ab3..0f378634 100644
--- a/tests/config/single_action_space_fixed_blue_actions_main_config.yaml
+++ b/tests/config/single_action_space_fixed_blue_actions_main_config.yaml
@@ -23,16 +23,11 @@ agent_identifier: RANDOM
 # "ANY" node and acl actions
 action_type: ANY
 # Number of episodes for training to run per session
-num_train_episodes: 10
+num_train_episodes: 1
 
 # Number of time_steps for training per episode
-num_train_steps: 256
+num_train_steps: 15
 
-# Number of episodes for evaluation to run per session
-num_eval_episodes: 10
-
-# Number of time_steps for evaluation per episode
-num_eval_steps: 256
 # Time delay between steps (for generic agents)
 time_delay: 1
 # Type of session to be run (TRAINING or EVALUATION)
diff --git a/tests/config/single_action_space_lay_down_config.yaml b/tests/config/single_action_space_lay_down_config.yaml
index c80c0bab..9d05b84a 100644
--- a/tests/config/single_action_space_lay_down_config.yaml
+++ b/tests/config/single_action_space_lay_down_config.yaml
@@ -32,14 +32,6 @@
   - name: ftp
     port: '21'
     state: COMPROMISED
-- item_type: POSITION
-  positions:
-  - node: '1'
-    x_pos: 309
-    y_pos: 78
-  - node: '2'
-    x_pos: 200
-    y_pos: 78
 - item_type: RED_IER
   id: '3'
   start_step: 2
diff --git a/tests/config/test_random_red_main_config.yaml b/tests/config/test_random_red_main_config.yaml
index e0fc40ee..e2b24b41 100644
--- a/tests/config/test_random_red_main_config.yaml
+++ b/tests/config/test_random_red_main_config.yaml
@@ -29,16 +29,16 @@ random_red_agent: True
 # "ANY" node and acl actions
 action_type: NODE
 # Number of episodes for training to run per session
-num_train_episodes: 10
+num_train_episodes: 2
 
 # Number of time_steps for training per episode
-num_train_steps: 256
+num_train_steps: 15
 
 # Number of episodes for evaluation to run per session
-num_eval_episodes: 10
+num_eval_episodes: 2
 
 # Number of time_steps for evaluation per episode
-num_eval_steps: 256
+num_eval_steps: 15
 # Time delay between steps (for generic agents)
 time_delay: 1
 
diff --git a/tests/test_reward.py b/tests/test_reward.py
index d1b56671..bb6eb1b0 100644
--- a/tests/test_reward.py
+++ b/tests/test_reward.py
@@ -47,6 +47,6 @@ def test_rewards_are_being_penalised_at_each_step_function(
         Average Reward: -8 (-120 / 15)
     """
     with temp_primaite_session as session:
-        session.close()
+        session.evaluate()
         ev_rewards = session.eval_av_reward_per_episode_csv()
         assert ev_rewards[1] == -8.0

From 563ff72fd646d66ae737977eafde381faf6e8f58 Mon Sep 17 00:00:00 2001
From: SunilSamra <sunil-samra@nsc.co.uk>
Date: Mon, 10 Jul 2023 13:24:34 +0100
Subject: [PATCH 6/9] 1566 - fixed the test_training_config.py test file by
 removing num_steps from init

---
 src/primaite/config/training_config.py                  | 8 ++++----
 tests/config/legacy_conversion/new_training_config.yaml | 5 -----
 2 files changed, 4 insertions(+), 9 deletions(-)

diff --git a/src/primaite/config/training_config.py b/src/primaite/config/training_config.py
index 5bbe881b..785d9757 100644
--- a/src/primaite/config/training_config.py
+++ b/src/primaite/config/training_config.py
@@ -300,7 +300,7 @@ def convert_legacy_training_config_dict(
     agent_framework: AgentFramework = AgentFramework.SB3,
     agent_identifier: AgentIdentifier = AgentIdentifier.PPO,
     action_type: ActionType = ActionType.ANY,
-    num_steps: int = 256,
+    num_train_steps: int = 256,
 ) -> Dict[str, Any]:
     """
     Convert a legacy training config dict to the new format.
@@ -312,15 +312,15 @@ def convert_legacy_training_config_dict(
         training configs don't have agent_identifier values.
     :param action_type: The action space type to set as legacy training configs
         don't have action_type values.
-    :param num_steps: The number of steps to set as legacy training configs
-        don't have num_steps values.
+    :param num_train_steps: The number of steps to set as legacy training configs
+        don't have num_train_steps values.
     :return: The converted training config dict.
     """
     config_dict = {
         "agent_framework": agent_framework.name,
         "agent_identifier": agent_identifier.name,
         "action_type": action_type.name,
-        "num_steps": num_steps,
+        "num_train_steps": num_train_steps,
         "sb3_output_verbose_level": SB3OutputVerboseLevel.INFO.name,
     }
     session_type_map = {"TRAINING": "TRAIN", "EVALUATION": "EVAL"}
diff --git a/tests/config/legacy_conversion/new_training_config.yaml b/tests/config/legacy_conversion/new_training_config.yaml
index 5ca80742..c57741f7 100644
--- a/tests/config/legacy_conversion/new_training_config.yaml
+++ b/tests/config/legacy_conversion/new_training_config.yaml
@@ -26,11 +26,6 @@ num_train_episodes: 10
 # Number of time_steps for training per episode
 num_train_steps: 256
 
-# Number of episodes for evaluation to run per session
-num_eval_episodes: 10
-
-# Number of time_steps for evaluation per episode
-num_eval_steps: 256
 # Time delay between steps (for generic agents)
 time_delay: 10
 # Type of session to be run (TRAINING or EVALUATION)

From f3750032bea2a5c600559fa4910e3031c41fdfdd Mon Sep 17 00:00:00 2001
From: SunilSamra <sunil-samra@nsc.co.uk>
Date: Tue, 11 Jul 2023 12:37:14 +0100
Subject: [PATCH 7/9] 1566 - applied pre-commit

---
 tests/test_train_eval_episode_steps.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_train_eval_episode_steps.py b/tests/test_train_eval_episode_steps.py
index daa93055..b839e630 100644
--- a/tests/test_train_eval_episode_steps.py
+++ b/tests/test_train_eval_episode_steps.py
@@ -22,8 +22,8 @@ def test_eval_steps_differ_from_training(temp_primaite_session):
         num_eval_episodes = 1
     """
     expected_learning_metadata = {"total_episodes": 3, "total_time_steps": 75}
-
     expected_evaluation_metadata = {"total_episodes": 1, "total_time_steps": 17}
+
     with temp_primaite_session as session:
         # Run learning and check episode and step counts
         session.learn()

From 585d35338f96c7354af96f140674cfc6d5faf9ac Mon Sep 17 00:00:00 2001
From: SunilSamra <sunil-samra@nsc.co.uk>
Date: Tue, 11 Jul 2023 12:40:25 +0100
Subject: [PATCH 8/9] 1566 - updated docs for new items in training_config

---
 docs/source/config.rst | 19 +++++++++++++++----
 1 file changed, 15 insertions(+), 4 deletions(-)

diff --git a/docs/source/config.rst b/docs/source/config.rst
index a28f0ec1..af590a24 100644
--- a/docs/source/config.rst
+++ b/docs/source/config.rst
@@ -83,13 +83,24 @@ The environment config file consists of the following attributes:
 
     The other configurable item is ``flatten`` which is false by default. When set to true, the observation space is flattened (turned into a 1-D vector). You should use this if your RL agent does not natively support observation space types like ``gym.Spaces.Tuple``.
 
-* **num_episodes** [int]
+* **num_train_episodes** [int]
 
-    This defines the number of episodes that the agent will train or be evaluated over.
+    This defines the number of episodes that the agent will train for.
 
-* **num_steps** [int]
 
-    Determines the number of steps to run in each episode of the session
+* **num_train_steps** [int]
+
+    Determines the number of steps to run in each episode of the training session.
+
+
+* **num_eval_episodes** [int]
+
+    This defines the number of episodes that the agent will be evaluated over.
+
+
+* **num_eval_steps** [int]
+
+    Determines the number of steps to run in each episode of the evaluation session.
 
 
 * **time_delay** [int]

From 96b48aad796b627034150f515a73bb27dd64d722 Mon Sep 17 00:00:00 2001
From: SunilSamra <sunil-samra@nsc.co.uk>
Date: Wed, 12 Jul 2023 09:52:54 +0100
Subject: [PATCH 9/9] 1566 - removed redundant config file

---
 tests/config/training_config_main.yaml | 153 -------------------------
 1 file changed, 153 deletions(-)
 delete mode 100644 tests/config/training_config_main.yaml

diff --git a/tests/config/training_config_main.yaml b/tests/config/training_config_main.yaml
deleted file mode 100644
index 3351d66b..00000000
--- a/tests/config/training_config_main.yaml
+++ /dev/null
@@ -1,153 +0,0 @@
-# Training Config File
-
-# Sets which agent algorithm framework will be used.
-# Options are:
-# "SB3" (Stable Baselines3)
-# "RLLIB" (Ray RLlib)
-# "CUSTOM" (Custom Agent)
-agent_framework: SB3
-
-# Sets which deep learning framework will be used (by RLlib ONLY).
-# Default is TF (Tensorflow).
-# Options are:
-# "TF" (Tensorflow)
-# TF2 (Tensorflow 2.X)
-# TORCH (PyTorch)
-deep_learning_framework: TF2
-
-# Sets which Agent class will be used.
-# Options are:
-# "A2C" (Advantage Actor Critic coupled with either SB3 or RLLIB agent_framework)
-# "PPO" (Proximal Policy Optimization coupled with either SB3 or RLLIB agent_framework)
-# "HARDCODED" (The HardCoded agents coupled with an ACL or NODE action_type)
-# "DO_NOTHING" (The DoNothing agents coupled with an ACL or NODE action_type)
-# "RANDOM" (primaite.agents.simple.RandomAgent)
-# "DUMMY" (primaite.agents.simple.DummyAgent)
-agent_identifier: PPO
-
-# Sets whether Red Agent POL and IER is randomised.
-# Options are:
-# True
-# False
-random_red_agent: False
-
-# Sets what view of the environment the deterministic hardcoded agent has. The default is BASIC.
-# Options are:
-# "BASIC" (The current observation space only)
-# "FULL" (Full environment view with actions taken and reward feedback)
-hard_coded_agent_view: FULL
-
-# Sets How the Action Space is defined:
-# "NODE"
-# "ACL"
-# "ANY" node and acl actions
-action_type: NODE
-# observation space
-observation_space:
-  # flatten: true
-  components:
-    - name: NODE_LINK_TABLE
-    # - name: NODE_STATUSES
-    # - name: LINK_TRAFFIC_LEVELS
-
-
-# Number of episodes for training to run per session
-num_train_episodes: 10
-
-# Number of time_steps for training per episode
-num_train_steps: 256
-
-# Number of episodes for evaluation to run per session
-num_eval_episodes: 10
-
-# Number of time_steps for evaluation per episode
-num_eval_steps: 256
-
-# Sets how often the agent will save a checkpoint (every n time episodes).
-# Set to 0 if no checkpoints are required. Default is 10
-checkpoint_every_n_episodes: 10
-
-# Time delay (milliseconds) between steps for CUSTOM agents.
-time_delay: 5
-
-# Type of session to be run. Options are:
-# "TRAIN" (Trains an agent)
-# "EVAL" (Evaluates an agent)
-# "TRAIN_EVAL" (Trains then evaluates an agent)
-session_type: TRAIN
-
-# Environment config values
-# The high value for the observation space
-observation_space_high_value: 1000000000
-
-# The Stable Baselines3 learn/eval output verbosity level:
-# Options are:
-# "NONE" (No Output)
-# "INFO" (Info Messages (such as devices and wrappers used))
-# "DEBUG" (All Messages)
-sb3_output_verbose_level: NONE
-
-# Reward values
-# Generic
-all_ok: 0
-# Node Hardware State
-off_should_be_on: -10
-off_should_be_resetting: -5
-on_should_be_off: -2
-on_should_be_resetting: -5
-resetting_should_be_on: -5
-resetting_should_be_off: -2
-resetting: -3
-# Node Software or Service State
-good_should_be_patching: 2
-good_should_be_compromised: 5
-good_should_be_overwhelmed: 5
-patching_should_be_good: -5
-patching_should_be_compromised: 2
-patching_should_be_overwhelmed: 2
-patching: -3
-compromised_should_be_good: -20
-compromised_should_be_patching: -20
-compromised_should_be_overwhelmed: -20
-compromised: -20
-overwhelmed_should_be_good: -20
-overwhelmed_should_be_patching: -20
-overwhelmed_should_be_compromised: -20
-overwhelmed: -20
-# Node File System State
-good_should_be_repairing: 2
-good_should_be_restoring: 2
-good_should_be_corrupt: 5
-good_should_be_destroyed: 10
-repairing_should_be_good: -5
-repairing_should_be_restoring: 2
-repairing_should_be_corrupt: 2
-repairing_should_be_destroyed: 0
-repairing: -3
-restoring_should_be_good: -10
-restoring_should_be_repairing: -2
-restoring_should_be_corrupt: 1
-restoring_should_be_destroyed: 2
-restoring: -6
-corrupt_should_be_good: -10
-corrupt_should_be_repairing: -10
-corrupt_should_be_restoring: -10
-corrupt_should_be_destroyed: 2
-corrupt: -10
-destroyed_should_be_good: -20
-destroyed_should_be_repairing: -20
-destroyed_should_be_restoring: -20
-destroyed_should_be_corrupt: -20
-destroyed: -20
-scanning: -2
-# IER status
-red_ier_running: -5
-green_ier_blocked: -10
-
-# Patching / Reset durations
-os_patching_duration: 5            # The time taken to patch the OS
-node_reset_duration: 5             # The time taken to reset a node (hardware)
-service_patching_duration: 5       # The time taken to patch a service
-file_system_repairing_limit: 5      # The time take to repair the file system
-file_system_restoring_limit: 5      # The time take to restore the file system
-file_system_scanning_limit: 5       # The time taken to scan the file system