diff --git a/src/primaite/agents/agent.py b/src/primaite/agents/agent.py
index 685fe776..4eb398b4 100644
--- a/src/primaite/agents/agent.py
+++ b/src/primaite/agents/agent.py
@@ -348,8 +348,8 @@ class HardCodedAgentSessionABC(AgentSessionABC):
         self._env.set_as_eval()  # noqa
         self.is_eval = True
 
-        time_steps = self._training_config.num_steps
-        episodes = self._training_config.num_episodes
+        time_steps = self._training_config.num_eval_steps
+        episodes = self._training_config.num_eval_episodes
 
         obs = self._env.reset()
         for episode in range(episodes):
diff --git a/src/primaite/agents/rllib.py b/src/primaite/agents/rllib.py
index d851ba9c..443598e7 100644
--- a/src/primaite/agents/rllib.py
+++ b/src/primaite/agents/rllib.py
@@ -107,13 +107,13 @@ class RLlibAgent(AgentSessionABC):
             ),
         )
 
-        self._agent_config.training(train_batch_size=self._training_config.num_steps)
+        self._agent_config.training(train_batch_size=self._training_config.num_train_steps)
         self._agent_config.framework(framework="tf")
 
         self._agent_config.rollouts(
             num_rollout_workers=1,
             num_envs_per_worker=1,
-            horizon=self._training_config.num_steps,
+            horizon=self._training_config.num_train_steps,
         )
         self._agent: Algorithm = self._agent_config.build(logger_creator=_custom_log_creator(self.learning_path))
 
@@ -121,7 +121,7 @@ class RLlibAgent(AgentSessionABC):
         checkpoint_n = self._training_config.checkpoint_every_n_episodes
         episode_count = self._current_result["episodes_total"]
         if checkpoint_n > 0 and episode_count > 0:
-            if (episode_count % checkpoint_n == 0) or (episode_count == self._training_config.num_episodes):
+            if (episode_count % checkpoint_n == 0) or (episode_count == self._training_config.num_train_episodes):
                 self._agent.save(str(self.checkpoints_path))
 
     def learn(
@@ -133,8 +133,8 @@ class RLlibAgent(AgentSessionABC):
 
         :param kwargs: Any agent-specific key-word args to be passed.
         """
-        time_steps = self._training_config.num_steps
-        episodes = self._training_config.num_episodes
+        time_steps = self._training_config.num_train_steps
+        episodes = self._training_config.num_train_episodes
 
         _LOGGER.info(f"Beginning learning for {episodes} episodes @" f" {time_steps} time steps...")
         for i in range(episodes):
diff --git a/src/primaite/agents/sb3.py b/src/primaite/agents/sb3.py
index f5ac44cb..17827ff4 100644
--- a/src/primaite/agents/sb3.py
+++ b/src/primaite/agents/sb3.py
@@ -53,11 +53,12 @@ class SB3Agent(AgentSessionABC):
             session_path=self.session_path,
             timestamp_str=self.timestamp_str,
         )
+
         self._agent = self._agent_class(
             PPOMlp,
             self._env,
             verbose=self.sb3_output_verbose_level,
-            n_steps=self._training_config.num_steps,
+            n_steps=self._training_config.num_eval_steps,
             tensorboard_log=str(self._tensorboard_log_path),
         )
 
@@ -82,8 +83,8 @@ class SB3Agent(AgentSessionABC):
 
         :param kwargs: Any agent-specific key-word args to be passed.
         """
-        time_steps = self._training_config.num_steps
-        episodes = self._training_config.num_episodes
+        time_steps = self._training_config.num_train_steps
+        episodes = self._training_config.num_train_episodes
         self.is_eval = False
         _LOGGER.info(f"Beginning learning for {episodes} episodes @" f" {time_steps} time steps...")
         for i in range(episodes):
@@ -104,8 +105,8 @@ class SB3Agent(AgentSessionABC):
         :param deterministic: Whether the evaluation is deterministic.
         :param kwargs: Any agent-specific key-word args to be passed.
         """
-        time_steps = self._training_config.num_steps
-        episodes = self._training_config.num_episodes
+        time_steps = self._training_config.num_eval_steps
+        episodes = self._training_config.num_eval_episodes
         self._env.set_as_eval()
         self.is_eval = True
         if deterministic:
diff --git a/src/primaite/config/training_config.py b/src/primaite/config/training_config.py
index bd73f65b..018fd982 100644
--- a/src/primaite/config/training_config.py
+++ b/src/primaite/config/training_config.py
@@ -60,11 +60,17 @@ class TrainingConfig:
     action_type: ActionType = ActionType.ANY
     "The ActionType to use"
 
-    num_episodes: int = 10
-    "The number of episodes to train over"
+    num_train_episodes: int = 10
+    "The number of episodes to train over during an training session"
 
-    num_steps: int = 256
-    "The number of steps in an episode"
+    num_train_steps: int = 256
+    "The number of steps in an episode during an training session"
+
+    num_eval_episodes: int = 10
+    "The number of episodes to train over during an evaluation session"
+
+    num_eval_steps: int = 256
+    "The number of steps in an episode during an evaluation session"
 
     checkpoint_every_n_episodes: int = 5
     "The agent will save a checkpoint every n episodes"
@@ -230,8 +236,17 @@ class TrainingConfig:
             tc += f"{self.hard_coded_agent_view}, "
         tc += f"{self.action_type}, "
         tc += f"observation_space={self.observation_space}, "
-        tc += f"{self.num_episodes} episodes @ "
-        tc += f"{self.num_steps} steps"
+        if self.session_type.name == "TRAIN":
+            tc += f"{self.num_train_episodes} episodes @ "
+            tc += f"{self.num_train_steps} steps"
+        elif self.session_type.name == "EVAL":
+            tc += f"{self.num_eval_episodes} episodes @ "
+            tc += f"{self.num_eval_steps} steps"
+        else:
+            tc += f"Training: {self.num_eval_episodes} episodes @ "
+            tc += f"{self.num_eval_steps} steps"
+            tc += f"Evaluation: {self.num_eval_episodes} episodes @ "
+            tc += f"{self.num_eval_steps} steps"
         return tc
 
 
@@ -320,7 +335,8 @@ def _get_new_key_from_legacy(legacy_key: str) -> str:
     """
     key_mapping = {
         "agentIdentifier": None,
-        "numEpisodes": "num_episodes",
+        "numEpisodes": "num_train_episodes",
+        "numSteps": "num_train_steps",
         "timeDelay": "time_delay",
         "configFilename": None,
         "sessionType": "session_type",
diff --git a/src/primaite/environment/primaite_env.py b/src/primaite/environment/primaite_env.py
index 03c23f93..c7e67e34 100644
--- a/src/primaite/environment/primaite_env.py
+++ b/src/primaite/environment/primaite_env.py
@@ -85,7 +85,12 @@ class Primaite(Env):
         _LOGGER.info(f"Using: {str(self.training_config)}")
 
         # Number of steps in an episode
-        self.episode_steps = self.training_config.num_steps
+        if self.training_config.session_type == SessionType.TRAIN:
+            self.episode_steps = self.training_config.num_train_steps
+        elif self.training_config.session_type == SessionType.EVAL:
+            self.episode_steps = self.training_config.num_eval_steps
+        else:
+            self.episode_steps = self.training_config.num_train_steps
 
         super(Primaite, self).__init__()
 
@@ -254,6 +259,7 @@ class Primaite(Env):
         self.episode_count = 0
         self.step_count = 0
         self.total_step_count = 0
+        self.episode_steps = self.training_config.num_eval_steps
 
     def reset(self):
         """
diff --git a/tests/config/legacy_conversion/new_training_config.yaml b/tests/config/legacy_conversion/new_training_config.yaml
index 49e6a00b..5ca80742 100644
--- a/tests/config/legacy_conversion/new_training_config.yaml
+++ b/tests/config/legacy_conversion/new_training_config.yaml
@@ -20,10 +20,17 @@ agent_identifier: PPO
 # "ACL"
 # "ANY" node and acl actions
 action_type: ANY
-# Number of episodes to run per session
-num_episodes: 10
-# Number of time_steps per episode
-num_steps: 256
+# Number of episodes for training to run per session
+num_train_episodes: 10
+
+# Number of time_steps for training per episode
+num_train_steps: 256
+
+# Number of episodes for evaluation to run per session
+num_eval_episodes: 10
+
+# Number of time_steps for evaluation per episode
+num_eval_steps: 256
 # Time delay between steps (for generic agents)
 time_delay: 10
 # Type of session to be run (TRAINING or EVALUATION)
diff --git a/tests/config/obs_tests/main_config_LINK_TRAFFIC_LEVELS.yaml b/tests/config/obs_tests/main_config_LINK_TRAFFIC_LEVELS.yaml
index d26d7955..bbdce9c1 100644
--- a/tests/config/obs_tests/main_config_LINK_TRAFFIC_LEVELS.yaml
+++ b/tests/config/obs_tests/main_config_LINK_TRAFFIC_LEVELS.yaml
@@ -22,10 +22,17 @@ agent_identifier: A2C
 # "ACL"
 # "ANY" node and acl actions
 action_type: ANY
-# Number of episodes to run per session
-num_episodes: 1
-# Number of time_steps per episode
-num_steps: 5
+# Number of episodes for training to run per session
+num_train_episodes: 10
+
+# Number of time_steps for training per episode
+num_train_steps: 256
+
+# Number of episodes for evaluation to run per session
+num_eval_episodes: 10
+
+# Number of time_steps for evaluation per episode
+num_eval_steps: 256
 
 
 observation_space:
diff --git a/tests/config/obs_tests/main_config_NODE_LINK_TABLE.yaml b/tests/config/obs_tests/main_config_NODE_LINK_TABLE.yaml
index aae740b6..41b3e588 100644
--- a/tests/config/obs_tests/main_config_NODE_LINK_TABLE.yaml
+++ b/tests/config/obs_tests/main_config_NODE_LINK_TABLE.yaml
@@ -22,10 +22,17 @@ agent_identifier: RANDOM
 # "ACL"
 # "ANY" node and acl actions
 action_type: ANY
-# Number of episodes to run per session
-num_episodes: 1
-# Number of time_steps per episode
-num_steps: 5
+# Number of episodes for training to run per session
+num_train_episodes: 10
+
+# Number of time_steps for training per episode
+num_train_steps: 256
+
+# Number of episodes for evaluation to run per session
+num_eval_episodes: 10
+
+# Number of time_steps for evaluation per episode
+num_eval_steps: 256
 
 observation_space:
   components:
diff --git a/tests/config/obs_tests/main_config_NODE_STATUSES.yaml b/tests/config/obs_tests/main_config_NODE_STATUSES.yaml
index 4066eace..34758199 100644
--- a/tests/config/obs_tests/main_config_NODE_STATUSES.yaml
+++ b/tests/config/obs_tests/main_config_NODE_STATUSES.yaml
@@ -22,10 +22,17 @@ agent_identifier: RANDOM
 # "ACL"
 # "ANY" node and acl actions
 action_type: ANY
-# Number of episodes to run per session
-num_episodes: 1
-# Number of time_steps per episode
-num_steps: 5
+# Number of episodes for training to run per session
+num_train_episodes: 10
+
+# Number of time_steps for training per episode
+num_train_steps: 256
+
+# Number of episodes for evaluation to run per session
+num_eval_episodes: 10
+
+# Number of time_steps for evaluation per episode
+num_eval_steps: 256
 
 observation_space:
   components:
diff --git a/tests/config/obs_tests/main_config_without_obs.yaml b/tests/config/obs_tests/main_config_without_obs.yaml
index 08452dda..352e765c 100644
--- a/tests/config/obs_tests/main_config_without_obs.yaml
+++ b/tests/config/obs_tests/main_config_without_obs.yaml
@@ -22,10 +22,17 @@ agent_identifier: RANDOM
 # "ACL"
 # "ANY" node and acl actions
 action_type: ANY
-# Number of episodes to run per session
-num_episodes: 1
-# Number of time_steps per episode
-num_steps: 5
+# Number of episodes for training to run per session
+num_train_episodes: 10
+
+# Number of time_steps for training per episode
+num_train_steps: 256
+
+# Number of episodes for evaluation to run per session
+num_eval_episodes: 10
+
+# Number of time_steps for evaluation per episode
+num_eval_steps: 256
 # Time delay between steps (for generic agents)
 time_delay: 1
 # Type of session to be run (TRAINING or EVALUATION)
diff --git a/tests/config/one_node_states_on_off_main_config.yaml b/tests/config/one_node_states_on_off_main_config.yaml
index 7f1ced01..63fdd1a5 100644
--- a/tests/config/one_node_states_on_off_main_config.yaml
+++ b/tests/config/one_node_states_on_off_main_config.yaml
@@ -22,10 +22,17 @@ agent_identifier: DUMMY
 # "ACL"
 # "ANY" node and acl actions
 action_type: NODE
-# Number of episodes to run per session
-num_episodes: 1
-# Number of time_steps per episode
-num_steps: 15
+# Number of episodes for training to run per session
+num_train_episodes: 10
+
+# Number of time_steps for training per episode
+num_train_steps: 256
+
+# Number of episodes for evaluation to run per session
+num_eval_episodes: 10
+
+# Number of time_steps for evaluation per episode
+num_eval_steps: 256
 # Time delay between steps (for generic agents)
 time_delay: 1
 
diff --git a/tests/config/single_action_space_fixed_blue_actions_main_config.yaml b/tests/config/single_action_space_fixed_blue_actions_main_config.yaml
index 97d0ddaf..859b2ab3 100644
--- a/tests/config/single_action_space_fixed_blue_actions_main_config.yaml
+++ b/tests/config/single_action_space_fixed_blue_actions_main_config.yaml
@@ -22,10 +22,17 @@ agent_identifier: RANDOM
 # "ACL"
 # "ANY" node and acl actions
 action_type: ANY
-# Number of episodes to run per session
-num_episodes: 1
-# Number of time_steps per episode
-num_steps: 15
+# Number of episodes for training to run per session
+num_train_episodes: 10
+
+# Number of time_steps for training per episode
+num_train_steps: 256
+
+# Number of episodes for evaluation to run per session
+num_eval_episodes: 10
+
+# Number of time_steps for evaluation per episode
+num_eval_steps: 256
 # Time delay between steps (for generic agents)
 time_delay: 1
 # Type of session to be run (TRAINING or EVALUATION)
diff --git a/tests/config/single_action_space_main_config.yaml b/tests/config/single_action_space_main_config.yaml
index 067b9a6d..c875757f 100644
--- a/tests/config/single_action_space_main_config.yaml
+++ b/tests/config/single_action_space_main_config.yaml
@@ -22,10 +22,17 @@ agent_identifier: RANDOM
 # "ACL"
 # "ANY" node and acl actions
 action_type: ANY
-# Number of episodes to run per session
-num_episodes: 1
-# Number of time_steps per episode
-num_steps: 5
+# Number of episodes for training to run per session
+num_train_episodes: 10
+
+# Number of time_steps for training per episode
+num_train_steps: 256
+
+# Number of episodes for evaluation to run per session
+num_eval_episodes: 10
+
+# Number of time_steps for evaluation per episode
+num_eval_steps: 256
 # Time delay between steps (for generic agents)
 time_delay: 1
 # Type of session to be run (TRAINING or EVALUATION)
diff --git a/tests/config/test_random_red_main_config.yaml b/tests/config/test_random_red_main_config.yaml
index 800fe808..e0fc40ee 100644
--- a/tests/config/test_random_red_main_config.yaml
+++ b/tests/config/test_random_red_main_config.yaml
@@ -28,10 +28,17 @@ random_red_agent: True
 # "ACL"
 # "ANY" node and acl actions
 action_type: NODE
-# Number of episodes to run per session
-num_episodes: 2
-# Number of time_steps per episode
-num_steps: 15
+# Number of episodes for training to run per session
+num_train_episodes: 10
+
+# Number of time_steps for training per episode
+num_train_steps: 256
+
+# Number of episodes for evaluation to run per session
+num_eval_episodes: 10
+
+# Number of time_steps for evaluation per episode
+num_eval_steps: 256
 # Time delay between steps (for generic agents)
 time_delay: 1
 
diff --git a/tests/config/train_episode_step.yaml b/tests/config/train_episode_step.yaml
new file mode 100644
index 00000000..550b95fd
--- /dev/null
+++ b/tests/config/train_episode_step.yaml
@@ -0,0 +1,153 @@
+# Training Config File
+
+# Sets which agent algorithm framework will be used.
+# Options are:
+# "SB3" (Stable Baselines3)
+# "RLLIB" (Ray RLlib)
+# "CUSTOM" (Custom Agent)
+agent_framework: SB3
+
+# Sets which deep learning framework will be used (by RLlib ONLY).
+# Default is TF (Tensorflow).
+# Options are:
+# "TF" (Tensorflow)
+# TF2 (Tensorflow 2.X)
+# TORCH (PyTorch)
+deep_learning_framework: TF2
+
+# Sets which Agent class will be used.
+# Options are:
+# "A2C" (Advantage Actor Critic coupled with either SB3 or RLLIB agent_framework)
+# "PPO" (Proximal Policy Optimization coupled with either SB3 or RLLIB agent_framework)
+# "HARDCODED" (The HardCoded agents coupled with an ACL or NODE action_type)
+# "DO_NOTHING" (The DoNothing agents coupled with an ACL or NODE action_type)
+# "RANDOM" (primaite.agents.simple.RandomAgent)
+# "DUMMY" (primaite.agents.simple.DummyAgent)
+agent_identifier: PPO
+
+# Sets whether Red Agent POL and IER is randomised.
+# Options are:
+# True
+# False
+random_red_agent: False
+
+# Sets what view of the environment the deterministic hardcoded agent has. The default is BASIC.
+# Options are:
+# "BASIC" (The current observation space only)
+# "FULL" (Full environment view with actions taken and reward feedback)
+hard_coded_agent_view: FULL
+
+# Sets How the Action Space is defined:
+# "NODE"
+# "ACL"
+# "ANY" node and acl actions
+action_type: NODE
+# observation space
+observation_space:
+  # flatten: true
+  components:
+    - name: NODE_LINK_TABLE
+    # - name: NODE_STATUSES
+    # - name: LINK_TRAFFIC_LEVELS
+
+
+# Number of episodes for training to run per session
+num_train_episodes: 30
+
+# Number of time_steps for training per episode
+num_train_steps: 1
+
+# Number of episodes for evaluation to run per session
+num_eval_episodes: 10
+
+# Number of time_steps for evaluation per episode
+num_eval_steps: 10
+
+# Sets how often the agent will save a checkpoint (every n time episodes).
+# Set to 0 if no checkpoints are required. Default is 10
+checkpoint_every_n_episodes: 10
+
+# Time delay (milliseconds) between steps for CUSTOM agents.
+time_delay: 5
+
+# Type of session to be run. Options are:
+# "TRAIN" (Trains an agent)
+# "EVAL" (Evaluates an agent)
+# "TRAIN_EVAL" (Trains then evaluates an agent)
+session_type: EVAL
+
+# Environment config values
+# The high value for the observation space
+observation_space_high_value: 1000000000
+
+# The Stable Baselines3 learn/eval output verbosity level:
+# Options are:
+# "NONE" (No Output)
+# "INFO" (Info Messages (such as devices and wrappers used))
+# "DEBUG" (All Messages)
+sb3_output_verbose_level: NONE
+
+# Reward values
+# Generic
+all_ok: 0
+# Node Hardware State
+off_should_be_on: -10
+off_should_be_resetting: -5
+on_should_be_off: -2
+on_should_be_resetting: -5
+resetting_should_be_on: -5
+resetting_should_be_off: -2
+resetting: -3
+# Node Software or Service State
+good_should_be_patching: 2
+good_should_be_compromised: 5
+good_should_be_overwhelmed: 5
+patching_should_be_good: -5
+patching_should_be_compromised: 2
+patching_should_be_overwhelmed: 2
+patching: -3
+compromised_should_be_good: -20
+compromised_should_be_patching: -20
+compromised_should_be_overwhelmed: -20
+compromised: -20
+overwhelmed_should_be_good: -20
+overwhelmed_should_be_patching: -20
+overwhelmed_should_be_compromised: -20
+overwhelmed: -20
+# Node File System State
+good_should_be_repairing: 2
+good_should_be_restoring: 2
+good_should_be_corrupt: 5
+good_should_be_destroyed: 10
+repairing_should_be_good: -5
+repairing_should_be_restoring: 2
+repairing_should_be_corrupt: 2
+repairing_should_be_destroyed: 0
+repairing: -3
+restoring_should_be_good: -10
+restoring_should_be_repairing: -2
+restoring_should_be_corrupt: 1
+restoring_should_be_destroyed: 2
+restoring: -6
+corrupt_should_be_good: -10
+corrupt_should_be_repairing: -10
+corrupt_should_be_restoring: -10
+corrupt_should_be_destroyed: 2
+corrupt: -10
+destroyed_should_be_good: -20
+destroyed_should_be_repairing: -20
+destroyed_should_be_restoring: -20
+destroyed_should_be_corrupt: -20
+destroyed: -20
+scanning: -2
+# IER status
+red_ier_running: -5
+green_ier_blocked: -10
+
+# Patching / Reset durations
+os_patching_duration: 5            # The time taken to patch the OS
+node_reset_duration: 5             # The time taken to reset a node (hardware)
+service_patching_duration: 5       # The time taken to patch a service
+file_system_repairing_limit: 5      # The time take to repair the file system
+file_system_restoring_limit: 5      # The time take to restore the file system
+file_system_scanning_limit: 5       # The time taken to scan the file system
diff --git a/tests/config/train_eval_check_episode_step.yaml b/tests/config/train_eval_check_episode_step.yaml
new file mode 100644
index 00000000..f616116e
--- /dev/null
+++ b/tests/config/train_eval_check_episode_step.yaml
@@ -0,0 +1,153 @@
+# Training Config File
+
+# Sets which agent algorithm framework will be used.
+# Options are:
+# "SB3" (Stable Baselines3)
+# "RLLIB" (Ray RLlib)
+# "CUSTOM" (Custom Agent)
+agent_framework: SB3
+
+# Sets which deep learning framework will be used (by RLlib ONLY).
+# Default is TF (Tensorflow).
+# Options are:
+# "TF" (Tensorflow)
+# TF2 (Tensorflow 2.X)
+# TORCH (PyTorch)
+deep_learning_framework: TF2
+
+# Sets which Agent class will be used.
+# Options are:
+# "A2C" (Advantage Actor Critic coupled with either SB3 or RLLIB agent_framework)
+# "PPO" (Proximal Policy Optimization coupled with either SB3 or RLLIB agent_framework)
+# "HARDCODED" (The HardCoded agents coupled with an ACL or NODE action_type)
+# "DO_NOTHING" (The DoNothing agents coupled with an ACL or NODE action_type)
+# "RANDOM" (primaite.agents.simple.RandomAgent)
+# "DUMMY" (primaite.agents.simple.DummyAgent)
+agent_identifier: PPO
+
+# Sets whether Red Agent POL and IER is randomised.
+# Options are:
+# True
+# False
+random_red_agent: False
+
+# Sets what view of the environment the deterministic hardcoded agent has. The default is BASIC.
+# Options are:
+# "BASIC" (The current observation space only)
+# "FULL" (Full environment view with actions taken and reward feedback)
+hard_coded_agent_view: FULL
+
+# Sets How the Action Space is defined:
+# "NODE"
+# "ACL"
+# "ANY" node and acl actions
+action_type: NODE
+# observation space
+observation_space:
+  # flatten: true
+  components:
+    - name: NODE_LINK_TABLE
+    # - name: NODE_STATUSES
+    # - name: LINK_TRAFFIC_LEVELS
+
+
+# Number of episodes for training to run per session
+num_train_episodes: 30
+
+# Number of time_steps for training per episode
+num_train_steps: 1
+
+# Number of episodes for evaluation to run per session
+num_eval_episodes: 10
+
+# Number of time_steps for evaluation per episode
+num_eval_steps: 10
+
+# Sets how often the agent will save a checkpoint (every n time episodes).
+# Set to 0 if no checkpoints are required. Default is 10
+checkpoint_every_n_episodes: 10
+
+# Time delay (milliseconds) between steps for CUSTOM agents.
+time_delay: 5
+
+# Type of session to be run. Options are:
+# "TRAIN" (Trains an agent)
+# "EVAL" (Evaluates an agent)
+# "TRAIN_EVAL" (Trains then evaluates an agent)
+session_type: TRAIN
+
+# Environment config values
+# The high value for the observation space
+observation_space_high_value: 1000000000
+
+# The Stable Baselines3 learn/eval output verbosity level:
+# Options are:
+# "NONE" (No Output)
+# "INFO" (Info Messages (such as devices and wrappers used))
+# "DEBUG" (All Messages)
+sb3_output_verbose_level: NONE
+
+# Reward values
+# Generic
+all_ok: 0
+# Node Hardware State
+off_should_be_on: -10
+off_should_be_resetting: -5
+on_should_be_off: -2
+on_should_be_resetting: -5
+resetting_should_be_on: -5
+resetting_should_be_off: -2
+resetting: -3
+# Node Software or Service State
+good_should_be_patching: 2
+good_should_be_compromised: 5
+good_should_be_overwhelmed: 5
+patching_should_be_good: -5
+patching_should_be_compromised: 2
+patching_should_be_overwhelmed: 2
+patching: -3
+compromised_should_be_good: -20
+compromised_should_be_patching: -20
+compromised_should_be_overwhelmed: -20
+compromised: -20
+overwhelmed_should_be_good: -20
+overwhelmed_should_be_patching: -20
+overwhelmed_should_be_compromised: -20
+overwhelmed: -20
+# Node File System State
+good_should_be_repairing: 2
+good_should_be_restoring: 2
+good_should_be_corrupt: 5
+good_should_be_destroyed: 10
+repairing_should_be_good: -5
+repairing_should_be_restoring: 2
+repairing_should_be_corrupt: 2
+repairing_should_be_destroyed: 0
+repairing: -3
+restoring_should_be_good: -10
+restoring_should_be_repairing: -2
+restoring_should_be_corrupt: 1
+restoring_should_be_destroyed: 2
+restoring: -6
+corrupt_should_be_good: -10
+corrupt_should_be_repairing: -10
+corrupt_should_be_restoring: -10
+corrupt_should_be_destroyed: 2
+corrupt: -10
+destroyed_should_be_good: -20
+destroyed_should_be_repairing: -20
+destroyed_should_be_restoring: -20
+destroyed_should_be_corrupt: -20
+destroyed: -20
+scanning: -2
+# IER status
+red_ier_running: -5
+green_ier_blocked: -10
+
+# Patching / Reset durations
+os_patching_duration: 5            # The time taken to patch the OS
+node_reset_duration: 5             # The time taken to reset a node (hardware)
+service_patching_duration: 5       # The time taken to patch a service
+file_system_repairing_limit: 5      # The time take to repair the file system
+file_system_restoring_limit: 5      # The time take to restore the file system
+file_system_scanning_limit: 5       # The time taken to scan the file system
diff --git a/tests/config/training_config_main.yaml b/tests/config/training_config_main.yaml
new file mode 100644
index 00000000..3351d66b
--- /dev/null
+++ b/tests/config/training_config_main.yaml
@@ -0,0 +1,153 @@
+# Training Config File
+
+# Sets which agent algorithm framework will be used.
+# Options are:
+# "SB3" (Stable Baselines3)
+# "RLLIB" (Ray RLlib)
+# "CUSTOM" (Custom Agent)
+agent_framework: SB3
+
+# Sets which deep learning framework will be used (by RLlib ONLY).
+# Default is TF (Tensorflow).
+# Options are:
+# "TF" (Tensorflow)
+# TF2 (Tensorflow 2.X)
+# TORCH (PyTorch)
+deep_learning_framework: TF2
+
+# Sets which Agent class will be used.
+# Options are:
+# "A2C" (Advantage Actor Critic coupled with either SB3 or RLLIB agent_framework)
+# "PPO" (Proximal Policy Optimization coupled with either SB3 or RLLIB agent_framework)
+# "HARDCODED" (The HardCoded agents coupled with an ACL or NODE action_type)
+# "DO_NOTHING" (The DoNothing agents coupled with an ACL or NODE action_type)
+# "RANDOM" (primaite.agents.simple.RandomAgent)
+# "DUMMY" (primaite.agents.simple.DummyAgent)
+agent_identifier: PPO
+
+# Sets whether Red Agent POL and IER is randomised.
+# Options are:
+# True
+# False
+random_red_agent: False
+
+# Sets what view of the environment the deterministic hardcoded agent has. The default is BASIC.
+# Options are:
+# "BASIC" (The current observation space only)
+# "FULL" (Full environment view with actions taken and reward feedback)
+hard_coded_agent_view: FULL
+
+# Sets How the Action Space is defined:
+# "NODE"
+# "ACL"
+# "ANY" node and acl actions
+action_type: NODE
+# observation space
+observation_space:
+  # flatten: true
+  components:
+    - name: NODE_LINK_TABLE
+    # - name: NODE_STATUSES
+    # - name: LINK_TRAFFIC_LEVELS
+
+
+# Number of episodes for training to run per session
+num_train_episodes: 10
+
+# Number of time_steps for training per episode
+num_train_steps: 256
+
+# Number of episodes for evaluation to run per session
+num_eval_episodes: 10
+
+# Number of time_steps for evaluation per episode
+num_eval_steps: 256
+
+# Sets how often the agent will save a checkpoint (every n time episodes).
+# Set to 0 if no checkpoints are required. Default is 10
+checkpoint_every_n_episodes: 10
+
+# Time delay (milliseconds) between steps for CUSTOM agents.
+time_delay: 5
+
+# Type of session to be run. Options are:
+# "TRAIN" (Trains an agent)
+# "EVAL" (Evaluates an agent)
+# "TRAIN_EVAL" (Trains then evaluates an agent)
+session_type: TRAIN
+
+# Environment config values
+# The high value for the observation space
+observation_space_high_value: 1000000000
+
+# The Stable Baselines3 learn/eval output verbosity level:
+# Options are:
+# "NONE" (No Output)
+# "INFO" (Info Messages (such as devices and wrappers used))
+# "DEBUG" (All Messages)
+sb3_output_verbose_level: NONE
+
+# Reward values
+# Generic
+all_ok: 0
+# Node Hardware State
+off_should_be_on: -10
+off_should_be_resetting: -5
+on_should_be_off: -2
+on_should_be_resetting: -5
+resetting_should_be_on: -5
+resetting_should_be_off: -2
+resetting: -3
+# Node Software or Service State
+good_should_be_patching: 2
+good_should_be_compromised: 5
+good_should_be_overwhelmed: 5
+patching_should_be_good: -5
+patching_should_be_compromised: 2
+patching_should_be_overwhelmed: 2
+patching: -3
+compromised_should_be_good: -20
+compromised_should_be_patching: -20
+compromised_should_be_overwhelmed: -20
+compromised: -20
+overwhelmed_should_be_good: -20
+overwhelmed_should_be_patching: -20
+overwhelmed_should_be_compromised: -20
+overwhelmed: -20
+# Node File System State
+good_should_be_repairing: 2
+good_should_be_restoring: 2
+good_should_be_corrupt: 5
+good_should_be_destroyed: 10
+repairing_should_be_good: -5
+repairing_should_be_restoring: 2
+repairing_should_be_corrupt: 2
+repairing_should_be_destroyed: 0
+repairing: -3
+restoring_should_be_good: -10
+restoring_should_be_repairing: -2
+restoring_should_be_corrupt: 1
+restoring_should_be_destroyed: 2
+restoring: -6
+corrupt_should_be_good: -10
+corrupt_should_be_repairing: -10
+corrupt_should_be_restoring: -10
+corrupt_should_be_destroyed: 2
+corrupt: -10
+destroyed_should_be_good: -20
+destroyed_should_be_repairing: -20
+destroyed_should_be_restoring: -20
+destroyed_should_be_corrupt: -20
+destroyed: -20
+scanning: -2
+# IER status
+red_ier_running: -5
+green_ier_blocked: -10
+
+# Patching / Reset durations
+os_patching_duration: 5            # The time taken to patch the OS
+node_reset_duration: 5             # The time taken to reset a node (hardware)
+service_patching_duration: 5       # The time taken to patch a service
+file_system_repairing_limit: 5      # The time take to repair the file system
+file_system_restoring_limit: 5      # The time take to restore the file system
+file_system_scanning_limit: 5       # The time taken to scan the file system
diff --git a/tests/conftest.py b/tests/conftest.py
index af76b314..7e06bea7 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -163,8 +163,9 @@ def run_generic(env, config_values):
     """Run against a generic agent."""
     # Reset the environment at the start of the episode
     # env.reset()
-    for episode in range(0, config_values.num_episodes):
-        for step in range(0, config_values.num_steps):
+    print(config_values.num_train_episodes, "how many episodes")
+    for episode in range(0, config_values.num_train_episodes):
+        for step in range(0, config_values.num_train_steps):
             # Send the observation space to the agent to get an action
             # TEMP - random action for now
             # action = env.blue_agent_action(obs)
diff --git a/tests/test_single_action_space.py b/tests/test_single_action_space.py
index 5d55b9c9..bfcffd42 100644
--- a/tests/test_single_action_space.py
+++ b/tests/test_single_action_space.py
@@ -12,8 +12,8 @@ def run_generic_set_actions(env: Primaite):
     # Reset the environment at the start of the episode
     # env.reset()
     training_config = env.training_config
-    for episode in range(0, training_config.num_episodes):
-        for step in range(0, training_config.num_steps):
+    for episode in range(0, training_config.num_train_episodes):
+        for step in range(0, training_config.num_train_steps):
             # Send the observation space to the agent to get an action
             # TEMP - random action for now
             # action = env.blue_agent_action(obs)
diff --git a/tests/test_train_eval_episode_steps.py b/tests/test_train_eval_episode_steps.py
new file mode 100644
index 00000000..fad30f1b
--- /dev/null
+++ b/tests/test_train_eval_episode_steps.py
@@ -0,0 +1,46 @@
+import pytest
+
+from primaite import getLogger
+from primaite.config.lay_down_config import dos_very_basic_config_path
+from tests import TEST_CONFIG_ROOT
+from tests.conftest import run_generic
+
+_LOGGER = getLogger(__name__)
+
+
+@pytest.mark.parametrize(
+    "temp_primaite_session",
+    [[TEST_CONFIG_ROOT / "train_episode_step.yaml", dos_very_basic_config_path()]],
+    indirect=True,
+)
+def test_eval_steps_differ_from_training(temp_primaite_session):
+    """Uses PrimaiteSession class to compare number of episodes used for training and evaluation."""
+    with temp_primaite_session as train_session:
+        env = train_session.env
+        train_session.learn()
+
+    """
+    Train_episode_step.yaml main config:
+    num_train_steps = 1
+    num_eval_steps = 10
+
+    When the YAML file changes from TRAIN to EVAL the episode step changes and uses the other config value.
+
+    The test is showing that 10 steps are running for evaluation and NOT 1 step as EVAL has been selected in the config.
+    """
+    assert env.episode_steps == 10  # 30
+    # assert env.actual_episode_count == 10 # should be 10
+
+
+@pytest.mark.parametrize(
+    "temp_primaite_session",
+    [[TEST_CONFIG_ROOT / "train_episode_step.yaml", dos_very_basic_config_path()]],
+    indirect=True,
+)
+def test_train_eval_config_option(temp_primaite_session):
+    """Uses PrimaiteSession class to test number of episodes and steps used for TRAIN and EVAL option."""
+    with temp_primaite_session as train_session:
+        env = train_session.env
+        run_generic(env, env.training_config)
+
+    print(env.actual_episode_count, env.step_count, env.total_step_count)