From 4371ca13fc0b655aeb2d4d20b365ef400f5db68b Mon Sep 17 00:00:00 2001 From: SunilSamra Date: Thu, 6 Jul 2023 11:12:51 +0100 Subject: [PATCH 1/9] 1566 - added train_episodes, train_steps, eval_episodes and eval_steps to training_config_main.yaml --- .../training/training_config_main.yaml | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/src/primaite/config/_package_data/training/training_config_main.yaml b/src/primaite/config/_package_data/training/training_config_main.yaml index a638fe14..dc94e3bb 100644 --- a/src/primaite/config/_package_data/training/training_config_main.yaml +++ b/src/primaite/config/_package_data/training/training_config_main.yaml @@ -49,11 +49,19 @@ observation_space: - name: NODE_LINK_TABLE # - name: NODE_STATUSES # - name: LINK_TRAFFIC_LEVELS -# Number of episodes to run per session -num_episodes: 10 -# Number of time_steps per episode -num_steps: 256 + +# Number of episodes for training to run per session +num_train_episodes: 10 + +# Number of time_steps for training per episode +num_train_steps: 256 + +# Number of episodes for evaluation to run per session +num_eval_episodes: 10 + +# Number of time_steps for evaluation per episode +num_eval_steps: 256 # Sets how often the agent will save a checkpoint (every n time episodes). # Set to 0 if no checkpoints are required. Default is 10 From e03c29b921705e127b6cd1d844af9df4927950f7 Mon Sep 17 00:00:00 2001 From: SunilSamra Date: Fri, 7 Jul 2023 14:13:47 +0100 Subject: [PATCH 2/9] 1566 - added test file and edited configs to include types of num steps and modifed agents to use correct step and episode counts --- src/primaite/agents/agent.py | 4 +- src/primaite/agents/rllib.py | 10 +- src/primaite/agents/sb3.py | 11 +- src/primaite/config/training_config.py | 30 +++- src/primaite/environment/primaite_env.py | 8 +- .../new_training_config.yaml | 15 +- .../main_config_LINK_TRAFFIC_LEVELS.yaml | 15 +- .../main_config_NODE_LINK_TABLE.yaml | 15 +- .../obs_tests/main_config_NODE_STATUSES.yaml | 15 +- .../obs_tests/main_config_without_obs.yaml | 15 +- .../one_node_states_on_off_main_config.yaml | 15 +- ..._space_fixed_blue_actions_main_config.yaml | 15 +- .../single_action_space_main_config.yaml | 15 +- tests/config/test_random_red_main_config.yaml | 15 +- tests/config/train_episode_step.yaml | 153 ++++++++++++++++++ .../config/train_eval_check_episode_step.yaml | 153 ++++++++++++++++++ tests/config/training_config_main.yaml | 153 ++++++++++++++++++ tests/conftest.py | 5 +- tests/test_single_action_space.py | 4 +- tests/test_train_eval_episode_steps.py | 46 ++++++ 20 files changed, 652 insertions(+), 60 deletions(-) create mode 100644 tests/config/train_episode_step.yaml create mode 100644 tests/config/train_eval_check_episode_step.yaml create mode 100644 tests/config/training_config_main.yaml create mode 100644 tests/test_train_eval_episode_steps.py diff --git a/src/primaite/agents/agent.py b/src/primaite/agents/agent.py index 685fe776..4eb398b4 100644 --- a/src/primaite/agents/agent.py +++ b/src/primaite/agents/agent.py @@ -348,8 +348,8 @@ class HardCodedAgentSessionABC(AgentSessionABC): self._env.set_as_eval() # noqa self.is_eval = True - time_steps = self._training_config.num_steps - episodes = self._training_config.num_episodes + time_steps = self._training_config.num_eval_steps + episodes = self._training_config.num_eval_episodes obs = self._env.reset() for episode in range(episodes): diff --git a/src/primaite/agents/rllib.py b/src/primaite/agents/rllib.py index d851ba9c..443598e7 100644 --- a/src/primaite/agents/rllib.py +++ b/src/primaite/agents/rllib.py @@ -107,13 +107,13 @@ class RLlibAgent(AgentSessionABC): ), ) - self._agent_config.training(train_batch_size=self._training_config.num_steps) + self._agent_config.training(train_batch_size=self._training_config.num_train_steps) self._agent_config.framework(framework="tf") self._agent_config.rollouts( num_rollout_workers=1, num_envs_per_worker=1, - horizon=self._training_config.num_steps, + horizon=self._training_config.num_train_steps, ) self._agent: Algorithm = self._agent_config.build(logger_creator=_custom_log_creator(self.learning_path)) @@ -121,7 +121,7 @@ class RLlibAgent(AgentSessionABC): checkpoint_n = self._training_config.checkpoint_every_n_episodes episode_count = self._current_result["episodes_total"] if checkpoint_n > 0 and episode_count > 0: - if (episode_count % checkpoint_n == 0) or (episode_count == self._training_config.num_episodes): + if (episode_count % checkpoint_n == 0) or (episode_count == self._training_config.num_train_episodes): self._agent.save(str(self.checkpoints_path)) def learn( @@ -133,8 +133,8 @@ class RLlibAgent(AgentSessionABC): :param kwargs: Any agent-specific key-word args to be passed. """ - time_steps = self._training_config.num_steps - episodes = self._training_config.num_episodes + time_steps = self._training_config.num_train_steps + episodes = self._training_config.num_train_episodes _LOGGER.info(f"Beginning learning for {episodes} episodes @" f" {time_steps} time steps...") for i in range(episodes): diff --git a/src/primaite/agents/sb3.py b/src/primaite/agents/sb3.py index f5ac44cb..17827ff4 100644 --- a/src/primaite/agents/sb3.py +++ b/src/primaite/agents/sb3.py @@ -53,11 +53,12 @@ class SB3Agent(AgentSessionABC): session_path=self.session_path, timestamp_str=self.timestamp_str, ) + self._agent = self._agent_class( PPOMlp, self._env, verbose=self.sb3_output_verbose_level, - n_steps=self._training_config.num_steps, + n_steps=self._training_config.num_eval_steps, tensorboard_log=str(self._tensorboard_log_path), ) @@ -82,8 +83,8 @@ class SB3Agent(AgentSessionABC): :param kwargs: Any agent-specific key-word args to be passed. """ - time_steps = self._training_config.num_steps - episodes = self._training_config.num_episodes + time_steps = self._training_config.num_train_steps + episodes = self._training_config.num_train_episodes self.is_eval = False _LOGGER.info(f"Beginning learning for {episodes} episodes @" f" {time_steps} time steps...") for i in range(episodes): @@ -104,8 +105,8 @@ class SB3Agent(AgentSessionABC): :param deterministic: Whether the evaluation is deterministic. :param kwargs: Any agent-specific key-word args to be passed. """ - time_steps = self._training_config.num_steps - episodes = self._training_config.num_episodes + time_steps = self._training_config.num_eval_steps + episodes = self._training_config.num_eval_episodes self._env.set_as_eval() self.is_eval = True if deterministic: diff --git a/src/primaite/config/training_config.py b/src/primaite/config/training_config.py index bd73f65b..018fd982 100644 --- a/src/primaite/config/training_config.py +++ b/src/primaite/config/training_config.py @@ -60,11 +60,17 @@ class TrainingConfig: action_type: ActionType = ActionType.ANY "The ActionType to use" - num_episodes: int = 10 - "The number of episodes to train over" + num_train_episodes: int = 10 + "The number of episodes to train over during an training session" - num_steps: int = 256 - "The number of steps in an episode" + num_train_steps: int = 256 + "The number of steps in an episode during an training session" + + num_eval_episodes: int = 10 + "The number of episodes to train over during an evaluation session" + + num_eval_steps: int = 256 + "The number of steps in an episode during an evaluation session" checkpoint_every_n_episodes: int = 5 "The agent will save a checkpoint every n episodes" @@ -230,8 +236,17 @@ class TrainingConfig: tc += f"{self.hard_coded_agent_view}, " tc += f"{self.action_type}, " tc += f"observation_space={self.observation_space}, " - tc += f"{self.num_episodes} episodes @ " - tc += f"{self.num_steps} steps" + if self.session_type.name == "TRAIN": + tc += f"{self.num_train_episodes} episodes @ " + tc += f"{self.num_train_steps} steps" + elif self.session_type.name == "EVAL": + tc += f"{self.num_eval_episodes} episodes @ " + tc += f"{self.num_eval_steps} steps" + else: + tc += f"Training: {self.num_eval_episodes} episodes @ " + tc += f"{self.num_eval_steps} steps" + tc += f"Evaluation: {self.num_eval_episodes} episodes @ " + tc += f"{self.num_eval_steps} steps" return tc @@ -320,7 +335,8 @@ def _get_new_key_from_legacy(legacy_key: str) -> str: """ key_mapping = { "agentIdentifier": None, - "numEpisodes": "num_episodes", + "numEpisodes": "num_train_episodes", + "numSteps": "num_train_steps", "timeDelay": "time_delay", "configFilename": None, "sessionType": "session_type", diff --git a/src/primaite/environment/primaite_env.py b/src/primaite/environment/primaite_env.py index 03c23f93..c7e67e34 100644 --- a/src/primaite/environment/primaite_env.py +++ b/src/primaite/environment/primaite_env.py @@ -85,7 +85,12 @@ class Primaite(Env): _LOGGER.info(f"Using: {str(self.training_config)}") # Number of steps in an episode - self.episode_steps = self.training_config.num_steps + if self.training_config.session_type == SessionType.TRAIN: + self.episode_steps = self.training_config.num_train_steps + elif self.training_config.session_type == SessionType.EVAL: + self.episode_steps = self.training_config.num_eval_steps + else: + self.episode_steps = self.training_config.num_train_steps super(Primaite, self).__init__() @@ -254,6 +259,7 @@ class Primaite(Env): self.episode_count = 0 self.step_count = 0 self.total_step_count = 0 + self.episode_steps = self.training_config.num_eval_steps def reset(self): """ diff --git a/tests/config/legacy_conversion/new_training_config.yaml b/tests/config/legacy_conversion/new_training_config.yaml index 49e6a00b..5ca80742 100644 --- a/tests/config/legacy_conversion/new_training_config.yaml +++ b/tests/config/legacy_conversion/new_training_config.yaml @@ -20,10 +20,17 @@ agent_identifier: PPO # "ACL" # "ANY" node and acl actions action_type: ANY -# Number of episodes to run per session -num_episodes: 10 -# Number of time_steps per episode -num_steps: 256 +# Number of episodes for training to run per session +num_train_episodes: 10 + +# Number of time_steps for training per episode +num_train_steps: 256 + +# Number of episodes for evaluation to run per session +num_eval_episodes: 10 + +# Number of time_steps for evaluation per episode +num_eval_steps: 256 # Time delay between steps (for generic agents) time_delay: 10 # Type of session to be run (TRAINING or EVALUATION) diff --git a/tests/config/obs_tests/main_config_LINK_TRAFFIC_LEVELS.yaml b/tests/config/obs_tests/main_config_LINK_TRAFFIC_LEVELS.yaml index d26d7955..bbdce9c1 100644 --- a/tests/config/obs_tests/main_config_LINK_TRAFFIC_LEVELS.yaml +++ b/tests/config/obs_tests/main_config_LINK_TRAFFIC_LEVELS.yaml @@ -22,10 +22,17 @@ agent_identifier: A2C # "ACL" # "ANY" node and acl actions action_type: ANY -# Number of episodes to run per session -num_episodes: 1 -# Number of time_steps per episode -num_steps: 5 +# Number of episodes for training to run per session +num_train_episodes: 10 + +# Number of time_steps for training per episode +num_train_steps: 256 + +# Number of episodes for evaluation to run per session +num_eval_episodes: 10 + +# Number of time_steps for evaluation per episode +num_eval_steps: 256 observation_space: diff --git a/tests/config/obs_tests/main_config_NODE_LINK_TABLE.yaml b/tests/config/obs_tests/main_config_NODE_LINK_TABLE.yaml index aae740b6..41b3e588 100644 --- a/tests/config/obs_tests/main_config_NODE_LINK_TABLE.yaml +++ b/tests/config/obs_tests/main_config_NODE_LINK_TABLE.yaml @@ -22,10 +22,17 @@ agent_identifier: RANDOM # "ACL" # "ANY" node and acl actions action_type: ANY -# Number of episodes to run per session -num_episodes: 1 -# Number of time_steps per episode -num_steps: 5 +# Number of episodes for training to run per session +num_train_episodes: 10 + +# Number of time_steps for training per episode +num_train_steps: 256 + +# Number of episodes for evaluation to run per session +num_eval_episodes: 10 + +# Number of time_steps for evaluation per episode +num_eval_steps: 256 observation_space: components: diff --git a/tests/config/obs_tests/main_config_NODE_STATUSES.yaml b/tests/config/obs_tests/main_config_NODE_STATUSES.yaml index 4066eace..34758199 100644 --- a/tests/config/obs_tests/main_config_NODE_STATUSES.yaml +++ b/tests/config/obs_tests/main_config_NODE_STATUSES.yaml @@ -22,10 +22,17 @@ agent_identifier: RANDOM # "ACL" # "ANY" node and acl actions action_type: ANY -# Number of episodes to run per session -num_episodes: 1 -# Number of time_steps per episode -num_steps: 5 +# Number of episodes for training to run per session +num_train_episodes: 10 + +# Number of time_steps for training per episode +num_train_steps: 256 + +# Number of episodes for evaluation to run per session +num_eval_episodes: 10 + +# Number of time_steps for evaluation per episode +num_eval_steps: 256 observation_space: components: diff --git a/tests/config/obs_tests/main_config_without_obs.yaml b/tests/config/obs_tests/main_config_without_obs.yaml index 08452dda..352e765c 100644 --- a/tests/config/obs_tests/main_config_without_obs.yaml +++ b/tests/config/obs_tests/main_config_without_obs.yaml @@ -22,10 +22,17 @@ agent_identifier: RANDOM # "ACL" # "ANY" node and acl actions action_type: ANY -# Number of episodes to run per session -num_episodes: 1 -# Number of time_steps per episode -num_steps: 5 +# Number of episodes for training to run per session +num_train_episodes: 10 + +# Number of time_steps for training per episode +num_train_steps: 256 + +# Number of episodes for evaluation to run per session +num_eval_episodes: 10 + +# Number of time_steps for evaluation per episode +num_eval_steps: 256 # Time delay between steps (for generic agents) time_delay: 1 # Type of session to be run (TRAINING or EVALUATION) diff --git a/tests/config/one_node_states_on_off_main_config.yaml b/tests/config/one_node_states_on_off_main_config.yaml index 7f1ced01..63fdd1a5 100644 --- a/tests/config/one_node_states_on_off_main_config.yaml +++ b/tests/config/one_node_states_on_off_main_config.yaml @@ -22,10 +22,17 @@ agent_identifier: DUMMY # "ACL" # "ANY" node and acl actions action_type: NODE -# Number of episodes to run per session -num_episodes: 1 -# Number of time_steps per episode -num_steps: 15 +# Number of episodes for training to run per session +num_train_episodes: 10 + +# Number of time_steps for training per episode +num_train_steps: 256 + +# Number of episodes for evaluation to run per session +num_eval_episodes: 10 + +# Number of time_steps for evaluation per episode +num_eval_steps: 256 # Time delay between steps (for generic agents) time_delay: 1 diff --git a/tests/config/single_action_space_fixed_blue_actions_main_config.yaml b/tests/config/single_action_space_fixed_blue_actions_main_config.yaml index 97d0ddaf..859b2ab3 100644 --- a/tests/config/single_action_space_fixed_blue_actions_main_config.yaml +++ b/tests/config/single_action_space_fixed_blue_actions_main_config.yaml @@ -22,10 +22,17 @@ agent_identifier: RANDOM # "ACL" # "ANY" node and acl actions action_type: ANY -# Number of episodes to run per session -num_episodes: 1 -# Number of time_steps per episode -num_steps: 15 +# Number of episodes for training to run per session +num_train_episodes: 10 + +# Number of time_steps for training per episode +num_train_steps: 256 + +# Number of episodes for evaluation to run per session +num_eval_episodes: 10 + +# Number of time_steps for evaluation per episode +num_eval_steps: 256 # Time delay between steps (for generic agents) time_delay: 1 # Type of session to be run (TRAINING or EVALUATION) diff --git a/tests/config/single_action_space_main_config.yaml b/tests/config/single_action_space_main_config.yaml index 067b9a6d..c875757f 100644 --- a/tests/config/single_action_space_main_config.yaml +++ b/tests/config/single_action_space_main_config.yaml @@ -22,10 +22,17 @@ agent_identifier: RANDOM # "ACL" # "ANY" node and acl actions action_type: ANY -# Number of episodes to run per session -num_episodes: 1 -# Number of time_steps per episode -num_steps: 5 +# Number of episodes for training to run per session +num_train_episodes: 10 + +# Number of time_steps for training per episode +num_train_steps: 256 + +# Number of episodes for evaluation to run per session +num_eval_episodes: 10 + +# Number of time_steps for evaluation per episode +num_eval_steps: 256 # Time delay between steps (for generic agents) time_delay: 1 # Type of session to be run (TRAINING or EVALUATION) diff --git a/tests/config/test_random_red_main_config.yaml b/tests/config/test_random_red_main_config.yaml index 800fe808..e0fc40ee 100644 --- a/tests/config/test_random_red_main_config.yaml +++ b/tests/config/test_random_red_main_config.yaml @@ -28,10 +28,17 @@ random_red_agent: True # "ACL" # "ANY" node and acl actions action_type: NODE -# Number of episodes to run per session -num_episodes: 2 -# Number of time_steps per episode -num_steps: 15 +# Number of episodes for training to run per session +num_train_episodes: 10 + +# Number of time_steps for training per episode +num_train_steps: 256 + +# Number of episodes for evaluation to run per session +num_eval_episodes: 10 + +# Number of time_steps for evaluation per episode +num_eval_steps: 256 # Time delay between steps (for generic agents) time_delay: 1 diff --git a/tests/config/train_episode_step.yaml b/tests/config/train_episode_step.yaml new file mode 100644 index 00000000..550b95fd --- /dev/null +++ b/tests/config/train_episode_step.yaml @@ -0,0 +1,153 @@ +# Training Config File + +# Sets which agent algorithm framework will be used. +# Options are: +# "SB3" (Stable Baselines3) +# "RLLIB" (Ray RLlib) +# "CUSTOM" (Custom Agent) +agent_framework: SB3 + +# Sets which deep learning framework will be used (by RLlib ONLY). +# Default is TF (Tensorflow). +# Options are: +# "TF" (Tensorflow) +# TF2 (Tensorflow 2.X) +# TORCH (PyTorch) +deep_learning_framework: TF2 + +# Sets which Agent class will be used. +# Options are: +# "A2C" (Advantage Actor Critic coupled with either SB3 or RLLIB agent_framework) +# "PPO" (Proximal Policy Optimization coupled with either SB3 or RLLIB agent_framework) +# "HARDCODED" (The HardCoded agents coupled with an ACL or NODE action_type) +# "DO_NOTHING" (The DoNothing agents coupled with an ACL or NODE action_type) +# "RANDOM" (primaite.agents.simple.RandomAgent) +# "DUMMY" (primaite.agents.simple.DummyAgent) +agent_identifier: PPO + +# Sets whether Red Agent POL and IER is randomised. +# Options are: +# True +# False +random_red_agent: False + +# Sets what view of the environment the deterministic hardcoded agent has. The default is BASIC. +# Options are: +# "BASIC" (The current observation space only) +# "FULL" (Full environment view with actions taken and reward feedback) +hard_coded_agent_view: FULL + +# Sets How the Action Space is defined: +# "NODE" +# "ACL" +# "ANY" node and acl actions +action_type: NODE +# observation space +observation_space: + # flatten: true + components: + - name: NODE_LINK_TABLE + # - name: NODE_STATUSES + # - name: LINK_TRAFFIC_LEVELS + + +# Number of episodes for training to run per session +num_train_episodes: 30 + +# Number of time_steps for training per episode +num_train_steps: 1 + +# Number of episodes for evaluation to run per session +num_eval_episodes: 10 + +# Number of time_steps for evaluation per episode +num_eval_steps: 10 + +# Sets how often the agent will save a checkpoint (every n time episodes). +# Set to 0 if no checkpoints are required. Default is 10 +checkpoint_every_n_episodes: 10 + +# Time delay (milliseconds) between steps for CUSTOM agents. +time_delay: 5 + +# Type of session to be run. Options are: +# "TRAIN" (Trains an agent) +# "EVAL" (Evaluates an agent) +# "TRAIN_EVAL" (Trains then evaluates an agent) +session_type: EVAL + +# Environment config values +# The high value for the observation space +observation_space_high_value: 1000000000 + +# The Stable Baselines3 learn/eval output verbosity level: +# Options are: +# "NONE" (No Output) +# "INFO" (Info Messages (such as devices and wrappers used)) +# "DEBUG" (All Messages) +sb3_output_verbose_level: NONE + +# Reward values +# Generic +all_ok: 0 +# Node Hardware State +off_should_be_on: -10 +off_should_be_resetting: -5 +on_should_be_off: -2 +on_should_be_resetting: -5 +resetting_should_be_on: -5 +resetting_should_be_off: -2 +resetting: -3 +# Node Software or Service State +good_should_be_patching: 2 +good_should_be_compromised: 5 +good_should_be_overwhelmed: 5 +patching_should_be_good: -5 +patching_should_be_compromised: 2 +patching_should_be_overwhelmed: 2 +patching: -3 +compromised_should_be_good: -20 +compromised_should_be_patching: -20 +compromised_should_be_overwhelmed: -20 +compromised: -20 +overwhelmed_should_be_good: -20 +overwhelmed_should_be_patching: -20 +overwhelmed_should_be_compromised: -20 +overwhelmed: -20 +# Node File System State +good_should_be_repairing: 2 +good_should_be_restoring: 2 +good_should_be_corrupt: 5 +good_should_be_destroyed: 10 +repairing_should_be_good: -5 +repairing_should_be_restoring: 2 +repairing_should_be_corrupt: 2 +repairing_should_be_destroyed: 0 +repairing: -3 +restoring_should_be_good: -10 +restoring_should_be_repairing: -2 +restoring_should_be_corrupt: 1 +restoring_should_be_destroyed: 2 +restoring: -6 +corrupt_should_be_good: -10 +corrupt_should_be_repairing: -10 +corrupt_should_be_restoring: -10 +corrupt_should_be_destroyed: 2 +corrupt: -10 +destroyed_should_be_good: -20 +destroyed_should_be_repairing: -20 +destroyed_should_be_restoring: -20 +destroyed_should_be_corrupt: -20 +destroyed: -20 +scanning: -2 +# IER status +red_ier_running: -5 +green_ier_blocked: -10 + +# Patching / Reset durations +os_patching_duration: 5 # The time taken to patch the OS +node_reset_duration: 5 # The time taken to reset a node (hardware) +service_patching_duration: 5 # The time taken to patch a service +file_system_repairing_limit: 5 # The time take to repair the file system +file_system_restoring_limit: 5 # The time take to restore the file system +file_system_scanning_limit: 5 # The time taken to scan the file system diff --git a/tests/config/train_eval_check_episode_step.yaml b/tests/config/train_eval_check_episode_step.yaml new file mode 100644 index 00000000..f616116e --- /dev/null +++ b/tests/config/train_eval_check_episode_step.yaml @@ -0,0 +1,153 @@ +# Training Config File + +# Sets which agent algorithm framework will be used. +# Options are: +# "SB3" (Stable Baselines3) +# "RLLIB" (Ray RLlib) +# "CUSTOM" (Custom Agent) +agent_framework: SB3 + +# Sets which deep learning framework will be used (by RLlib ONLY). +# Default is TF (Tensorflow). +# Options are: +# "TF" (Tensorflow) +# TF2 (Tensorflow 2.X) +# TORCH (PyTorch) +deep_learning_framework: TF2 + +# Sets which Agent class will be used. +# Options are: +# "A2C" (Advantage Actor Critic coupled with either SB3 or RLLIB agent_framework) +# "PPO" (Proximal Policy Optimization coupled with either SB3 or RLLIB agent_framework) +# "HARDCODED" (The HardCoded agents coupled with an ACL or NODE action_type) +# "DO_NOTHING" (The DoNothing agents coupled with an ACL or NODE action_type) +# "RANDOM" (primaite.agents.simple.RandomAgent) +# "DUMMY" (primaite.agents.simple.DummyAgent) +agent_identifier: PPO + +# Sets whether Red Agent POL and IER is randomised. +# Options are: +# True +# False +random_red_agent: False + +# Sets what view of the environment the deterministic hardcoded agent has. The default is BASIC. +# Options are: +# "BASIC" (The current observation space only) +# "FULL" (Full environment view with actions taken and reward feedback) +hard_coded_agent_view: FULL + +# Sets How the Action Space is defined: +# "NODE" +# "ACL" +# "ANY" node and acl actions +action_type: NODE +# observation space +observation_space: + # flatten: true + components: + - name: NODE_LINK_TABLE + # - name: NODE_STATUSES + # - name: LINK_TRAFFIC_LEVELS + + +# Number of episodes for training to run per session +num_train_episodes: 30 + +# Number of time_steps for training per episode +num_train_steps: 1 + +# Number of episodes for evaluation to run per session +num_eval_episodes: 10 + +# Number of time_steps for evaluation per episode +num_eval_steps: 10 + +# Sets how often the agent will save a checkpoint (every n time episodes). +# Set to 0 if no checkpoints are required. Default is 10 +checkpoint_every_n_episodes: 10 + +# Time delay (milliseconds) between steps for CUSTOM agents. +time_delay: 5 + +# Type of session to be run. Options are: +# "TRAIN" (Trains an agent) +# "EVAL" (Evaluates an agent) +# "TRAIN_EVAL" (Trains then evaluates an agent) +session_type: TRAIN + +# Environment config values +# The high value for the observation space +observation_space_high_value: 1000000000 + +# The Stable Baselines3 learn/eval output verbosity level: +# Options are: +# "NONE" (No Output) +# "INFO" (Info Messages (such as devices and wrappers used)) +# "DEBUG" (All Messages) +sb3_output_verbose_level: NONE + +# Reward values +# Generic +all_ok: 0 +# Node Hardware State +off_should_be_on: -10 +off_should_be_resetting: -5 +on_should_be_off: -2 +on_should_be_resetting: -5 +resetting_should_be_on: -5 +resetting_should_be_off: -2 +resetting: -3 +# Node Software or Service State +good_should_be_patching: 2 +good_should_be_compromised: 5 +good_should_be_overwhelmed: 5 +patching_should_be_good: -5 +patching_should_be_compromised: 2 +patching_should_be_overwhelmed: 2 +patching: -3 +compromised_should_be_good: -20 +compromised_should_be_patching: -20 +compromised_should_be_overwhelmed: -20 +compromised: -20 +overwhelmed_should_be_good: -20 +overwhelmed_should_be_patching: -20 +overwhelmed_should_be_compromised: -20 +overwhelmed: -20 +# Node File System State +good_should_be_repairing: 2 +good_should_be_restoring: 2 +good_should_be_corrupt: 5 +good_should_be_destroyed: 10 +repairing_should_be_good: -5 +repairing_should_be_restoring: 2 +repairing_should_be_corrupt: 2 +repairing_should_be_destroyed: 0 +repairing: -3 +restoring_should_be_good: -10 +restoring_should_be_repairing: -2 +restoring_should_be_corrupt: 1 +restoring_should_be_destroyed: 2 +restoring: -6 +corrupt_should_be_good: -10 +corrupt_should_be_repairing: -10 +corrupt_should_be_restoring: -10 +corrupt_should_be_destroyed: 2 +corrupt: -10 +destroyed_should_be_good: -20 +destroyed_should_be_repairing: -20 +destroyed_should_be_restoring: -20 +destroyed_should_be_corrupt: -20 +destroyed: -20 +scanning: -2 +# IER status +red_ier_running: -5 +green_ier_blocked: -10 + +# Patching / Reset durations +os_patching_duration: 5 # The time taken to patch the OS +node_reset_duration: 5 # The time taken to reset a node (hardware) +service_patching_duration: 5 # The time taken to patch a service +file_system_repairing_limit: 5 # The time take to repair the file system +file_system_restoring_limit: 5 # The time take to restore the file system +file_system_scanning_limit: 5 # The time taken to scan the file system diff --git a/tests/config/training_config_main.yaml b/tests/config/training_config_main.yaml new file mode 100644 index 00000000..3351d66b --- /dev/null +++ b/tests/config/training_config_main.yaml @@ -0,0 +1,153 @@ +# Training Config File + +# Sets which agent algorithm framework will be used. +# Options are: +# "SB3" (Stable Baselines3) +# "RLLIB" (Ray RLlib) +# "CUSTOM" (Custom Agent) +agent_framework: SB3 + +# Sets which deep learning framework will be used (by RLlib ONLY). +# Default is TF (Tensorflow). +# Options are: +# "TF" (Tensorflow) +# TF2 (Tensorflow 2.X) +# TORCH (PyTorch) +deep_learning_framework: TF2 + +# Sets which Agent class will be used. +# Options are: +# "A2C" (Advantage Actor Critic coupled with either SB3 or RLLIB agent_framework) +# "PPO" (Proximal Policy Optimization coupled with either SB3 or RLLIB agent_framework) +# "HARDCODED" (The HardCoded agents coupled with an ACL or NODE action_type) +# "DO_NOTHING" (The DoNothing agents coupled with an ACL or NODE action_type) +# "RANDOM" (primaite.agents.simple.RandomAgent) +# "DUMMY" (primaite.agents.simple.DummyAgent) +agent_identifier: PPO + +# Sets whether Red Agent POL and IER is randomised. +# Options are: +# True +# False +random_red_agent: False + +# Sets what view of the environment the deterministic hardcoded agent has. The default is BASIC. +# Options are: +# "BASIC" (The current observation space only) +# "FULL" (Full environment view with actions taken and reward feedback) +hard_coded_agent_view: FULL + +# Sets How the Action Space is defined: +# "NODE" +# "ACL" +# "ANY" node and acl actions +action_type: NODE +# observation space +observation_space: + # flatten: true + components: + - name: NODE_LINK_TABLE + # - name: NODE_STATUSES + # - name: LINK_TRAFFIC_LEVELS + + +# Number of episodes for training to run per session +num_train_episodes: 10 + +# Number of time_steps for training per episode +num_train_steps: 256 + +# Number of episodes for evaluation to run per session +num_eval_episodes: 10 + +# Number of time_steps for evaluation per episode +num_eval_steps: 256 + +# Sets how often the agent will save a checkpoint (every n time episodes). +# Set to 0 if no checkpoints are required. Default is 10 +checkpoint_every_n_episodes: 10 + +# Time delay (milliseconds) between steps for CUSTOM agents. +time_delay: 5 + +# Type of session to be run. Options are: +# "TRAIN" (Trains an agent) +# "EVAL" (Evaluates an agent) +# "TRAIN_EVAL" (Trains then evaluates an agent) +session_type: TRAIN + +# Environment config values +# The high value for the observation space +observation_space_high_value: 1000000000 + +# The Stable Baselines3 learn/eval output verbosity level: +# Options are: +# "NONE" (No Output) +# "INFO" (Info Messages (such as devices and wrappers used)) +# "DEBUG" (All Messages) +sb3_output_verbose_level: NONE + +# Reward values +# Generic +all_ok: 0 +# Node Hardware State +off_should_be_on: -10 +off_should_be_resetting: -5 +on_should_be_off: -2 +on_should_be_resetting: -5 +resetting_should_be_on: -5 +resetting_should_be_off: -2 +resetting: -3 +# Node Software or Service State +good_should_be_patching: 2 +good_should_be_compromised: 5 +good_should_be_overwhelmed: 5 +patching_should_be_good: -5 +patching_should_be_compromised: 2 +patching_should_be_overwhelmed: 2 +patching: -3 +compromised_should_be_good: -20 +compromised_should_be_patching: -20 +compromised_should_be_overwhelmed: -20 +compromised: -20 +overwhelmed_should_be_good: -20 +overwhelmed_should_be_patching: -20 +overwhelmed_should_be_compromised: -20 +overwhelmed: -20 +# Node File System State +good_should_be_repairing: 2 +good_should_be_restoring: 2 +good_should_be_corrupt: 5 +good_should_be_destroyed: 10 +repairing_should_be_good: -5 +repairing_should_be_restoring: 2 +repairing_should_be_corrupt: 2 +repairing_should_be_destroyed: 0 +repairing: -3 +restoring_should_be_good: -10 +restoring_should_be_repairing: -2 +restoring_should_be_corrupt: 1 +restoring_should_be_destroyed: 2 +restoring: -6 +corrupt_should_be_good: -10 +corrupt_should_be_repairing: -10 +corrupt_should_be_restoring: -10 +corrupt_should_be_destroyed: 2 +corrupt: -10 +destroyed_should_be_good: -20 +destroyed_should_be_repairing: -20 +destroyed_should_be_restoring: -20 +destroyed_should_be_corrupt: -20 +destroyed: -20 +scanning: -2 +# IER status +red_ier_running: -5 +green_ier_blocked: -10 + +# Patching / Reset durations +os_patching_duration: 5 # The time taken to patch the OS +node_reset_duration: 5 # The time taken to reset a node (hardware) +service_patching_duration: 5 # The time taken to patch a service +file_system_repairing_limit: 5 # The time take to repair the file system +file_system_restoring_limit: 5 # The time take to restore the file system +file_system_scanning_limit: 5 # The time taken to scan the file system diff --git a/tests/conftest.py b/tests/conftest.py index af76b314..7e06bea7 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -163,8 +163,9 @@ def run_generic(env, config_values): """Run against a generic agent.""" # Reset the environment at the start of the episode # env.reset() - for episode in range(0, config_values.num_episodes): - for step in range(0, config_values.num_steps): + print(config_values.num_train_episodes, "how many episodes") + for episode in range(0, config_values.num_train_episodes): + for step in range(0, config_values.num_train_steps): # Send the observation space to the agent to get an action # TEMP - random action for now # action = env.blue_agent_action(obs) diff --git a/tests/test_single_action_space.py b/tests/test_single_action_space.py index 5d55b9c9..bfcffd42 100644 --- a/tests/test_single_action_space.py +++ b/tests/test_single_action_space.py @@ -12,8 +12,8 @@ def run_generic_set_actions(env: Primaite): # Reset the environment at the start of the episode # env.reset() training_config = env.training_config - for episode in range(0, training_config.num_episodes): - for step in range(0, training_config.num_steps): + for episode in range(0, training_config.num_train_episodes): + for step in range(0, training_config.num_train_steps): # Send the observation space to the agent to get an action # TEMP - random action for now # action = env.blue_agent_action(obs) diff --git a/tests/test_train_eval_episode_steps.py b/tests/test_train_eval_episode_steps.py new file mode 100644 index 00000000..fad30f1b --- /dev/null +++ b/tests/test_train_eval_episode_steps.py @@ -0,0 +1,46 @@ +import pytest + +from primaite import getLogger +from primaite.config.lay_down_config import dos_very_basic_config_path +from tests import TEST_CONFIG_ROOT +from tests.conftest import run_generic + +_LOGGER = getLogger(__name__) + + +@pytest.mark.parametrize( + "temp_primaite_session", + [[TEST_CONFIG_ROOT / "train_episode_step.yaml", dos_very_basic_config_path()]], + indirect=True, +) +def test_eval_steps_differ_from_training(temp_primaite_session): + """Uses PrimaiteSession class to compare number of episodes used for training and evaluation.""" + with temp_primaite_session as train_session: + env = train_session.env + train_session.learn() + + """ + Train_episode_step.yaml main config: + num_train_steps = 1 + num_eval_steps = 10 + + When the YAML file changes from TRAIN to EVAL the episode step changes and uses the other config value. + + The test is showing that 10 steps are running for evaluation and NOT 1 step as EVAL has been selected in the config. + """ + assert env.episode_steps == 10 # 30 + # assert env.actual_episode_count == 10 # should be 10 + + +@pytest.mark.parametrize( + "temp_primaite_session", + [[TEST_CONFIG_ROOT / "train_episode_step.yaml", dos_very_basic_config_path()]], + indirect=True, +) +def test_train_eval_config_option(temp_primaite_session): + """Uses PrimaiteSession class to test number of episodes and steps used for TRAIN and EVAL option.""" + with temp_primaite_session as train_session: + env = train_session.env + run_generic(env, env.training_config) + + print(env.actual_episode_count, env.step_count, env.total_step_count) From 40381833d3bae56ae95be5550e0d6c44992017bc Mon Sep 17 00:00:00 2001 From: Chris McCarthy Date: Fri, 7 Jul 2023 15:50:14 +0100 Subject: [PATCH 3/9] #1566 - Refactored the test_train_eval_episode_steps.py to sue TempPrimaiteSession. - Fixed all errors that were caused b fixing the above. - Some tests still fail, these are for SS to fix. - Dropped the old run_generic stuff from conftest.py --- src/primaite/agents/agent.py | 14 +- src/primaite/agents/rllib.py | 9 +- src/primaite/agents/sb3.py | 6 +- .../training/training_config_main.yaml | 2 +- src/primaite/config/training_config.py | 6 +- src/primaite/environment/primaite_env.py | 10 +- src/primaite/environment/reward.py | 1 - src/primaite/utils/session_output_reader.py | 5 +- ...ne_node_states_on_off_lay_down_config.yaml | 5 - .../one_node_states_on_off_main_config.yaml | 8 +- tests/config/train_episode_step.yaml | 12 +- .../config/train_eval_check_episode_step.yaml | 153 ------------------ tests/conftest.py | 67 +------- tests/test_reward.py | 4 +- tests/test_train_eval_episode_steps.py | 46 +++--- 15 files changed, 69 insertions(+), 279 deletions(-) delete mode 100644 tests/config/train_eval_check_episode_step.yaml diff --git a/src/primaite/agents/agent.py b/src/primaite/agents/agent.py index 2cdb242b..883e844b 100644 --- a/src/primaite/agents/agent.py +++ b/src/primaite/agents/agent.py @@ -153,12 +153,11 @@ class AgentSessionABC(ABC): metadata_dict = json.load(file) metadata_dict["end_datetime"] = datetime.now().isoformat() - if not self.is_eval: - metadata_dict["learning"]["total_episodes"] = self._env.episode_count # noqa + metadata_dict["learning"]["total_episodes"] = self._env.actual_episode_count # noqa metadata_dict["learning"]["total_time_steps"] = self._env.total_step_count # noqa else: - metadata_dict["evaluation"]["total_episodes"] = self._env.episode_count # noqa + metadata_dict["evaluation"]["total_episodes"] = self._env.actual_episode_count # noqa metadata_dict["evaluation"]["total_time_steps"] = self._env.total_step_count # noqa filepath = self.session_path / "session_metadata.json" @@ -209,10 +208,11 @@ class AgentSessionABC(ABC): :param kwargs: Any agent-specific key-word args to be passed. """ - self._env.set_as_eval() # noqa - self.is_eval = True - self._plot_av_reward_per_episode(learning_session=False) - _LOGGER.info("Finished evaluation") + if self._can_evaluate: + self._plot_av_reward_per_episode(learning_session=False) + self._update_session_metadata_file() + self.is_eval = True + _LOGGER.info("Finished evaluation") @abstractmethod def _get_latest_checkpoint(self): diff --git a/src/primaite/agents/rllib.py b/src/primaite/agents/rllib.py index 28d21e20..7067f6a6 100644 --- a/src/primaite/agents/rllib.py +++ b/src/primaite/agents/rllib.py @@ -85,8 +85,12 @@ class RLlibAgent(AgentSessionABC): metadata_dict = json.load(file) metadata_dict["end_datetime"] = datetime.now().isoformat() - metadata_dict["total_episodes"] = self._current_result["episodes_total"] - metadata_dict["total_time_steps"] = self._current_result["timesteps_total"] + if not self.is_eval: + metadata_dict["learning"]["total_episodes"] = self._current_result["episodes_total"] # noqa + metadata_dict["learning"]["total_time_steps"] = self._current_result["timesteps_total"] # noqa + else: + metadata_dict["evaluation"]["total_episodes"] = self._current_result["episodes_total"] # noqa + metadata_dict["evaluation"]["total_time_steps"] = self._current_result["timesteps_total"] # noqa filepath = self.session_path / "session_metadata.json" _LOGGER.debug(f"Updating Session Metadata file: {filepath}") @@ -150,7 +154,6 @@ class RLlibAgent(AgentSessionABC): super().learn() - def evaluate( self, **kwargs, diff --git a/src/primaite/agents/sb3.py b/src/primaite/agents/sb3.py index 00983140..dc049e91 100644 --- a/src/primaite/agents/sb3.py +++ b/src/primaite/agents/sb3.py @@ -58,7 +58,7 @@ class SB3Agent(AgentSessionABC): PPOMlp, self._env, verbose=self.sb3_output_verbose_level, - n_steps=self._training_config.num_eval_steps, + n_steps=self._training_config.num_train_steps, tensorboard_log=str(self._tensorboard_log_path), seed=self._training_config.seed, ) @@ -93,7 +93,7 @@ class SB3Agent(AgentSessionABC): for i in range(episodes): self._agent.learn(total_timesteps=time_steps) self._save_checkpoint() - self._env.reset() + self._env._write_av_reward_per_episode() # noqa self.save() self._env.close() super().learn() @@ -129,7 +129,7 @@ class SB3Agent(AgentSessionABC): if isinstance(action, np.ndarray): action = np.int64(action) obs, rewards, done, info = self._env.step(action) - self._env.reset() + self._env._write_av_reward_per_episode() # noqa self._env.close() super().evaluate() diff --git a/src/primaite/config/_package_data/training/training_config_main.yaml b/src/primaite/config/_package_data/training/training_config_main.yaml index f45f976a..61c45758 100644 --- a/src/primaite/config/_package_data/training/training_config_main.yaml +++ b/src/primaite/config/_package_data/training/training_config_main.yaml @@ -68,7 +68,7 @@ num_train_episodes: 10 num_train_steps: 256 # Number of episodes for evaluation to run per session -num_eval_episodes: 10 +num_eval_episodes: 1 # Number of time_steps for evaluation per episode num_eval_steps: 256 diff --git a/src/primaite/config/training_config.py b/src/primaite/config/training_config.py index 2b46e513..5bbe881b 100644 --- a/src/primaite/config/training_config.py +++ b/src/primaite/config/training_config.py @@ -66,7 +66,7 @@ class TrainingConfig: num_train_steps: int = 256 "The number of steps in an episode during an training session" - num_eval_episodes: int = 10 + num_eval_episodes: int = 1 "The number of episodes to train over during an evaluation session" num_eval_steps: int = 256 @@ -242,10 +242,10 @@ class TrainingConfig: tc += f"{self.hard_coded_agent_view}, " tc += f"{self.action_type}, " tc += f"observation_space={self.observation_space}, " - if self.session_type.name == "TRAIN": + if self.session_type is SessionType.TRAIN: tc += f"{self.num_train_episodes} episodes @ " tc += f"{self.num_train_steps} steps" - elif self.session_type.name == "EVAL": + elif self.session_type is SessionType.EVAL: tc += f"{self.num_eval_episodes} episodes @ " tc += f"{self.num_eval_steps} steps" else: diff --git a/src/primaite/environment/primaite_env.py b/src/primaite/environment/primaite_env.py index 18cf8767..ed6eefb2 100644 --- a/src/primaite/environment/primaite_env.py +++ b/src/primaite/environment/primaite_env.py @@ -261,6 +261,11 @@ class Primaite(Env): self.total_step_count = 0 self.episode_steps = self.training_config.num_eval_steps + def _write_av_reward_per_episode(self): + if self.actual_episode_count > 0: + csv_data = self.actual_episode_count, self.average_reward + self.episode_av_reward_writer.write(csv_data) + def reset(self): """ AI Gym Reset function. @@ -268,10 +273,7 @@ class Primaite(Env): Returns: Environment observation space (reset) """ - if self.actual_episode_count > 0: - csv_data = self.actual_episode_count, self.average_reward - self.episode_av_reward_writer.write(csv_data) - + self._write_av_reward_per_episode() self.episode_count += 1 # Don't need to reset links, as they are cleared and recalculated every diff --git a/src/primaite/environment/reward.py b/src/primaite/environment/reward.py index e4353cb9..9cbb0078 100644 --- a/src/primaite/environment/reward.py +++ b/src/primaite/environment/reward.py @@ -90,7 +90,6 @@ def calculate_reward_function( f"Penalty of {ier_reward} was NOT applied." ) ) - return reward_value diff --git a/src/primaite/utils/session_output_reader.py b/src/primaite/utils/session_output_reader.py index d04f375e..eb7a7675 100644 --- a/src/primaite/utils/session_output_reader.py +++ b/src/primaite/utils/session_output_reader.py @@ -16,5 +16,6 @@ def av_rewards_dict(av_rewards_csv_file: Union[str, Path]) -> Dict[int, float]: :param av_rewards_csv_file: The average rewards per episode csv file path. :return: The average rewards per episode cdv as a dict. """ - d = pl.read_csv(av_rewards_csv_file).to_dict() - return {v: d["Average Reward"][i] for i, v in enumerate(d["Episode"])} + df = pl.read_csv(av_rewards_csv_file).to_dict() + + return {v: df["Average Reward"][i] for i, v in enumerate(df["Episode"])} diff --git a/tests/config/one_node_states_on_off_lay_down_config.yaml b/tests/config/one_node_states_on_off_lay_down_config.yaml index 996cf368..aadbd449 100644 --- a/tests/config/one_node_states_on_off_lay_down_config.yaml +++ b/tests/config/one_node_states_on_off_lay_down_config.yaml @@ -18,11 +18,6 @@ - name: ftp port: '21' state: GOOD -- item_type: POSITION - positions: - - node: '1' - x_pos: 309 - y_pos: 78 - item_type: RED_POL id: '1' start_step: 1 diff --git a/tests/config/one_node_states_on_off_main_config.yaml b/tests/config/one_node_states_on_off_main_config.yaml index 63fdd1a5..dd425a8c 100644 --- a/tests/config/one_node_states_on_off_main_config.yaml +++ b/tests/config/one_node_states_on_off_main_config.yaml @@ -22,17 +22,13 @@ agent_identifier: DUMMY # "ACL" # "ANY" node and acl actions action_type: NODE -# Number of episodes for training to run per session -num_train_episodes: 10 -# Number of time_steps for training per episode -num_train_steps: 256 # Number of episodes for evaluation to run per session -num_eval_episodes: 10 +num_eval_episodes: 1 # Number of time_steps for evaluation per episode -num_eval_steps: 256 +num_eval_steps: 15 # Time delay between steps (for generic agents) time_delay: 1 diff --git a/tests/config/train_episode_step.yaml b/tests/config/train_episode_step.yaml index 550b95fd..f112b741 100644 --- a/tests/config/train_episode_step.yaml +++ b/tests/config/train_episode_step.yaml @@ -52,20 +52,20 @@ observation_space: # Number of episodes for training to run per session -num_train_episodes: 30 +num_train_episodes: 3 # Number of time_steps for training per episode -num_train_steps: 1 +num_train_steps: 25 # Number of episodes for evaluation to run per session -num_eval_episodes: 10 +num_eval_episodes: 1 # Number of time_steps for evaluation per episode -num_eval_steps: 10 +num_eval_steps: 17 # Sets how often the agent will save a checkpoint (every n time episodes). # Set to 0 if no checkpoints are required. Default is 10 -checkpoint_every_n_episodes: 10 +checkpoint_every_n_episodes: 0 # Time delay (milliseconds) between steps for CUSTOM agents. time_delay: 5 @@ -74,7 +74,7 @@ time_delay: 5 # "TRAIN" (Trains an agent) # "EVAL" (Evaluates an agent) # "TRAIN_EVAL" (Trains then evaluates an agent) -session_type: EVAL +session_type: TRAIN_EVAL # Environment config values # The high value for the observation space diff --git a/tests/config/train_eval_check_episode_step.yaml b/tests/config/train_eval_check_episode_step.yaml deleted file mode 100644 index f616116e..00000000 --- a/tests/config/train_eval_check_episode_step.yaml +++ /dev/null @@ -1,153 +0,0 @@ -# Training Config File - -# Sets which agent algorithm framework will be used. -# Options are: -# "SB3" (Stable Baselines3) -# "RLLIB" (Ray RLlib) -# "CUSTOM" (Custom Agent) -agent_framework: SB3 - -# Sets which deep learning framework will be used (by RLlib ONLY). -# Default is TF (Tensorflow). -# Options are: -# "TF" (Tensorflow) -# TF2 (Tensorflow 2.X) -# TORCH (PyTorch) -deep_learning_framework: TF2 - -# Sets which Agent class will be used. -# Options are: -# "A2C" (Advantage Actor Critic coupled with either SB3 or RLLIB agent_framework) -# "PPO" (Proximal Policy Optimization coupled with either SB3 or RLLIB agent_framework) -# "HARDCODED" (The HardCoded agents coupled with an ACL or NODE action_type) -# "DO_NOTHING" (The DoNothing agents coupled with an ACL or NODE action_type) -# "RANDOM" (primaite.agents.simple.RandomAgent) -# "DUMMY" (primaite.agents.simple.DummyAgent) -agent_identifier: PPO - -# Sets whether Red Agent POL and IER is randomised. -# Options are: -# True -# False -random_red_agent: False - -# Sets what view of the environment the deterministic hardcoded agent has. The default is BASIC. -# Options are: -# "BASIC" (The current observation space only) -# "FULL" (Full environment view with actions taken and reward feedback) -hard_coded_agent_view: FULL - -# Sets How the Action Space is defined: -# "NODE" -# "ACL" -# "ANY" node and acl actions -action_type: NODE -# observation space -observation_space: - # flatten: true - components: - - name: NODE_LINK_TABLE - # - name: NODE_STATUSES - # - name: LINK_TRAFFIC_LEVELS - - -# Number of episodes for training to run per session -num_train_episodes: 30 - -# Number of time_steps for training per episode -num_train_steps: 1 - -# Number of episodes for evaluation to run per session -num_eval_episodes: 10 - -# Number of time_steps for evaluation per episode -num_eval_steps: 10 - -# Sets how often the agent will save a checkpoint (every n time episodes). -# Set to 0 if no checkpoints are required. Default is 10 -checkpoint_every_n_episodes: 10 - -# Time delay (milliseconds) between steps for CUSTOM agents. -time_delay: 5 - -# Type of session to be run. Options are: -# "TRAIN" (Trains an agent) -# "EVAL" (Evaluates an agent) -# "TRAIN_EVAL" (Trains then evaluates an agent) -session_type: TRAIN - -# Environment config values -# The high value for the observation space -observation_space_high_value: 1000000000 - -# The Stable Baselines3 learn/eval output verbosity level: -# Options are: -# "NONE" (No Output) -# "INFO" (Info Messages (such as devices and wrappers used)) -# "DEBUG" (All Messages) -sb3_output_verbose_level: NONE - -# Reward values -# Generic -all_ok: 0 -# Node Hardware State -off_should_be_on: -10 -off_should_be_resetting: -5 -on_should_be_off: -2 -on_should_be_resetting: -5 -resetting_should_be_on: -5 -resetting_should_be_off: -2 -resetting: -3 -# Node Software or Service State -good_should_be_patching: 2 -good_should_be_compromised: 5 -good_should_be_overwhelmed: 5 -patching_should_be_good: -5 -patching_should_be_compromised: 2 -patching_should_be_overwhelmed: 2 -patching: -3 -compromised_should_be_good: -20 -compromised_should_be_patching: -20 -compromised_should_be_overwhelmed: -20 -compromised: -20 -overwhelmed_should_be_good: -20 -overwhelmed_should_be_patching: -20 -overwhelmed_should_be_compromised: -20 -overwhelmed: -20 -# Node File System State -good_should_be_repairing: 2 -good_should_be_restoring: 2 -good_should_be_corrupt: 5 -good_should_be_destroyed: 10 -repairing_should_be_good: -5 -repairing_should_be_restoring: 2 -repairing_should_be_corrupt: 2 -repairing_should_be_destroyed: 0 -repairing: -3 -restoring_should_be_good: -10 -restoring_should_be_repairing: -2 -restoring_should_be_corrupt: 1 -restoring_should_be_destroyed: 2 -restoring: -6 -corrupt_should_be_good: -10 -corrupt_should_be_repairing: -10 -corrupt_should_be_restoring: -10 -corrupt_should_be_destroyed: 2 -corrupt: -10 -destroyed_should_be_good: -20 -destroyed_should_be_repairing: -20 -destroyed_should_be_restoring: -20 -destroyed_should_be_corrupt: -20 -destroyed: -20 -scanning: -2 -# IER status -red_ier_running: -5 -green_ier_blocked: -10 - -# Patching / Reset durations -os_patching_duration: 5 # The time taken to patch the OS -node_reset_duration: 5 # The time taken to reset a node (hardware) -service_patching_duration: 5 # The time taken to patch a service -file_system_repairing_limit: 5 # The time take to repair the file system -file_system_restoring_limit: 5 # The time take to restore the file system -file_system_scanning_limit: 5 # The time taken to scan the file system diff --git a/tests/conftest.py b/tests/conftest.py index 2d78f61d..aaf4dbce 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,17 +1,16 @@ # Crown Copyright (C) Dstl 2022. DEFCON 703. Shared in confidence. import datetime +import json import shutil import tempfile -import time from datetime import datetime from pathlib import Path -from typing import Dict, Union +from typing import Any, Dict, Union from unittest.mock import patch import pytest from primaite import getLogger -from primaite.common.enums import AgentIdentifier from primaite.environment.primaite_env import Primaite from primaite.primaite_session import PrimaiteSession from primaite.utils.session_output_reader import av_rewards_dict @@ -48,6 +47,11 @@ class TempPrimaiteSession(PrimaiteSession): csv_file = f"average_reward_per_episode_{self.timestamp_str}.csv" return av_rewards_dict(self.evaluation_path / csv_file) + def metadata_file_as_dict(self) -> Dict[str, Any]: + """Read the session_metadata.json file and return as a dict.""" + with open(self.session_path / "session_metadata.json", "r") as file: + return json.load(file) + @property def env(self) -> Primaite: """Direct access to the env for ease of testing.""" @@ -58,6 +62,7 @@ class TempPrimaiteSession(PrimaiteSession): def __exit__(self, type, value, tb): shutil.rmtree(self.session_path) + shutil.rmtree(self.session_path.parent) _LOGGER.debug(f"Deleted temp session directory: {self.session_path}") @@ -129,59 +134,3 @@ def temp_session_path() -> Path: session_path.mkdir(exist_ok=True, parents=True) return session_path - - -def _get_primaite_env_from_config( - training_config_path: Union[str, Path], - lay_down_config_path: Union[str, Path], - temp_session_path, -): - """Takes a config path and returns the created instance of Primaite.""" - session_timestamp: datetime = datetime.now() - session_path = temp_session_path(session_timestamp) - - timestamp_str = session_timestamp.strftime("%Y-%m-%d_%H-%M-%S") - env = Primaite( - training_config_path=training_config_path, - lay_down_config_path=lay_down_config_path, - session_path=session_path, - timestamp_str=timestamp_str, - ) - config_values = env.training_config - config_values.num_steps = env.episode_steps - - # TOOD: This needs t be refactored to happen outside. Should be part of - # a main Session class. - if env.training_config.agent_identifier is AgentIdentifier.RANDOM: - run_generic(env, config_values) - - return env - - -def run_generic(env, config_values): - """Run against a generic agent.""" - # Reset the environment at the start of the episode - # env.reset() - print(config_values.num_train_episodes, "how many episodes") - for episode in range(0, config_values.num_train_episodes): - for step in range(0, config_values.num_train_steps): - # Send the observation space to the agent to get an action - # TEMP - random action for now - # action = env.blue_agent_action(obs) - # action = env.action_space.sample() - action = 0 - - # Run the simulation step on the live environment - obs, reward, done, info = env.step(action) - - # Break if done is True - if done: - break - - # Introduce a delay between steps - time.sleep(config_values.time_delay / 1000) - - # Reset the environment at the end of the episode - # env.reset() - - # env.close() diff --git a/tests/test_reward.py b/tests/test_reward.py index 81437860..d1b56671 100644 --- a/tests/test_reward.py +++ b/tests/test_reward.py @@ -1,7 +1,10 @@ import pytest +from primaite import getLogger from tests import TEST_CONFIG_ROOT +_LOGGER = getLogger(__name__) + @pytest.mark.parametrize( "temp_primaite_session", @@ -44,7 +47,6 @@ def test_rewards_are_being_penalised_at_each_step_function( Average Reward: -8 (-120 / 15) """ with temp_primaite_session as session: - session.evaluate() session.close() ev_rewards = session.eval_av_reward_per_episode_csv() assert ev_rewards[1] == -8.0 diff --git a/tests/test_train_eval_episode_steps.py b/tests/test_train_eval_episode_steps.py index fad30f1b..daa93055 100644 --- a/tests/test_train_eval_episode_steps.py +++ b/tests/test_train_eval_episode_steps.py @@ -3,7 +3,6 @@ import pytest from primaite import getLogger from primaite.config.lay_down_config import dos_very_basic_config_path from tests import TEST_CONFIG_ROOT -from tests.conftest import run_generic _LOGGER = getLogger(__name__) @@ -14,33 +13,30 @@ _LOGGER = getLogger(__name__) indirect=True, ) def test_eval_steps_differ_from_training(temp_primaite_session): - """Uses PrimaiteSession class to compare number of episodes used for training and evaluation.""" - with temp_primaite_session as train_session: - env = train_session.env - train_session.learn() + """Uses PrimaiteSession class to compare number of episodes used for training and evaluation. - """ Train_episode_step.yaml main config: - num_train_steps = 1 - num_eval_steps = 10 - - When the YAML file changes from TRAIN to EVAL the episode step changes and uses the other config value. - - The test is showing that 10 steps are running for evaluation and NOT 1 step as EVAL has been selected in the config. + num_train_steps = 25 + num_train_episodes = 3 + num_eval_steps = 17 + num_eval_episodes = 1 """ - assert env.episode_steps == 10 # 30 - # assert env.actual_episode_count == 10 # should be 10 + expected_learning_metadata = {"total_episodes": 3, "total_time_steps": 75} + expected_evaluation_metadata = {"total_episodes": 1, "total_time_steps": 17} + with temp_primaite_session as session: + # Run learning and check episode and step counts + session.learn() + assert session.env.actual_episode_count == expected_learning_metadata["total_episodes"] + assert session.env.total_step_count == expected_learning_metadata["total_time_steps"] -@pytest.mark.parametrize( - "temp_primaite_session", - [[TEST_CONFIG_ROOT / "train_episode_step.yaml", dos_very_basic_config_path()]], - indirect=True, -) -def test_train_eval_config_option(temp_primaite_session): - """Uses PrimaiteSession class to test number of episodes and steps used for TRAIN and EVAL option.""" - with temp_primaite_session as train_session: - env = train_session.env - run_generic(env, env.training_config) + # Run evaluation and check episode and step counts + session.evaluate() + assert session.env.actual_episode_count == expected_evaluation_metadata["total_episodes"] + assert session.env.total_step_count == expected_evaluation_metadata["total_time_steps"] - print(env.actual_episode_count, env.step_count, env.total_step_count) + # Load the session_metadata.json file and check that the both the + # learning and evaluation match what is expected above + metadata = session.metadata_file_as_dict() + assert metadata["learning"] == expected_learning_metadata + assert metadata["evaluation"] == expected_evaluation_metadata From 41fab6562e8be6312256b597b7519815ae57938c Mon Sep 17 00:00:00 2001 From: SunilSamra Date: Fri, 7 Jul 2023 16:26:12 +0100 Subject: [PATCH 4/9] 1566 - updated configs to correct values of step count and number of episodes --- .../obs_tests/main_config_LINK_TRAFFIC_LEVELS.yaml | 11 ++--------- .../config/obs_tests/main_config_NODE_LINK_TABLE.yaml | 10 ++-------- tests/config/obs_tests/main_config_NODE_STATUSES.yaml | 9 ++------- tests/config/obs_tests/main_config_without_obs.yaml | 10 ++-------- 4 files changed, 8 insertions(+), 32 deletions(-) diff --git a/tests/config/obs_tests/main_config_LINK_TRAFFIC_LEVELS.yaml b/tests/config/obs_tests/main_config_LINK_TRAFFIC_LEVELS.yaml index bbdce9c1..2ac8f59a 100644 --- a/tests/config/obs_tests/main_config_LINK_TRAFFIC_LEVELS.yaml +++ b/tests/config/obs_tests/main_config_LINK_TRAFFIC_LEVELS.yaml @@ -23,17 +23,10 @@ agent_identifier: A2C # "ANY" node and acl actions action_type: ANY # Number of episodes for training to run per session -num_train_episodes: 10 +num_train_episodes: 1 # Number of time_steps for training per episode -num_train_steps: 256 - -# Number of episodes for evaluation to run per session -num_eval_episodes: 10 - -# Number of time_steps for evaluation per episode -num_eval_steps: 256 - +num_train_steps: 5 observation_space: components: diff --git a/tests/config/obs_tests/main_config_NODE_LINK_TABLE.yaml b/tests/config/obs_tests/main_config_NODE_LINK_TABLE.yaml index 41b3e588..a9986d5b 100644 --- a/tests/config/obs_tests/main_config_NODE_LINK_TABLE.yaml +++ b/tests/config/obs_tests/main_config_NODE_LINK_TABLE.yaml @@ -23,16 +23,10 @@ agent_identifier: RANDOM # "ANY" node and acl actions action_type: ANY # Number of episodes for training to run per session -num_train_episodes: 10 +num_train_episodes: 1 # Number of time_steps for training per episode -num_train_steps: 256 - -# Number of episodes for evaluation to run per session -num_eval_episodes: 10 - -# Number of time_steps for evaluation per episode -num_eval_steps: 256 +num_train_steps: 5 observation_space: components: diff --git a/tests/config/obs_tests/main_config_NODE_STATUSES.yaml b/tests/config/obs_tests/main_config_NODE_STATUSES.yaml index 34758199..a129712c 100644 --- a/tests/config/obs_tests/main_config_NODE_STATUSES.yaml +++ b/tests/config/obs_tests/main_config_NODE_STATUSES.yaml @@ -23,16 +23,11 @@ agent_identifier: RANDOM # "ANY" node and acl actions action_type: ANY # Number of episodes for training to run per session -num_train_episodes: 10 +num_train_episodes: 1 # Number of time_steps for training per episode -num_train_steps: 256 +num_train_steps: 5 -# Number of episodes for evaluation to run per session -num_eval_episodes: 10 - -# Number of time_steps for evaluation per episode -num_eval_steps: 256 observation_space: components: diff --git a/tests/config/obs_tests/main_config_without_obs.yaml b/tests/config/obs_tests/main_config_without_obs.yaml index 352e765c..03d11b82 100644 --- a/tests/config/obs_tests/main_config_without_obs.yaml +++ b/tests/config/obs_tests/main_config_without_obs.yaml @@ -23,16 +23,10 @@ agent_identifier: RANDOM # "ANY" node and acl actions action_type: ANY # Number of episodes for training to run per session -num_train_episodes: 10 +num_train_episodes: 1 # Number of time_steps for training per episode -num_train_steps: 256 - -# Number of episodes for evaluation to run per session -num_eval_episodes: 10 - -# Number of time_steps for evaluation per episode -num_eval_steps: 256 +num_train_steps: 5 # Time delay between steps (for generic agents) time_delay: 1 # Type of session to be run (TRAINING or EVALUATION) From 921dc934c266ed9379e97702bd9019073aa15fd5 Mon Sep 17 00:00:00 2001 From: SunilSamra Date: Mon, 10 Jul 2023 11:25:26 +0100 Subject: [PATCH 5/9] 1566 - added correct num_train_episodes etc values to configs, fixed test_reward.py --- src/primaite/agents/agent.py | 1 + tests/config/ppo_not_seeded_training_config.yaml | 10 ++++++++-- tests/config/ppo_seeded_training_config.yaml | 10 ++++++++-- ...le_action_space_fixed_blue_actions_main_config.yaml | 9 ++------- tests/config/single_action_space_lay_down_config.yaml | 8 -------- tests/config/test_random_red_main_config.yaml | 8 ++++---- tests/test_reward.py | 2 +- 7 files changed, 24 insertions(+), 24 deletions(-) diff --git a/src/primaite/agents/agent.py b/src/primaite/agents/agent.py index 883e844b..95a00f49 100644 --- a/src/primaite/agents/agent.py +++ b/src/primaite/agents/agent.py @@ -377,6 +377,7 @@ class HardCodedAgentSessionABC(AgentSessionABC): time.sleep(self._training_config.time_delay / 1000) obs = self._env.reset() self._env.close() + super().evaluate() @classmethod def load(cls): diff --git a/tests/config/ppo_not_seeded_training_config.yaml b/tests/config/ppo_not_seeded_training_config.yaml index 23cff44e..14b3f087 100644 --- a/tests/config/ppo_not_seeded_training_config.yaml +++ b/tests/config/ppo_not_seeded_training_config.yaml @@ -60,10 +60,16 @@ observation_space: # - name: NODE_STATUSES # - name: LINK_TRAFFIC_LEVELS # Number of episodes to run per session -num_episodes: 10 +num_train_episodes: 10 # Number of time_steps per episode -num_steps: 256 +num_train_steps: 256 + +# Number of episodes to run per session +num_eval_episodes: 10 + +# Number of time_steps per episode +num_eval_steps: 256 # Sets how often the agent will save a checkpoint (every n time episodes). # Set to 0 if no checkpoints are required. Default is 10 diff --git a/tests/config/ppo_seeded_training_config.yaml b/tests/config/ppo_seeded_training_config.yaml index 181331d9..a176c793 100644 --- a/tests/config/ppo_seeded_training_config.yaml +++ b/tests/config/ppo_seeded_training_config.yaml @@ -60,10 +60,16 @@ observation_space: # - name: NODE_STATUSES # - name: LINK_TRAFFIC_LEVELS # Number of episodes to run per session -num_episodes: 10 +num_train_episodes: 10 # Number of time_steps per episode -num_steps: 256 +num_train_steps: 256 + +# Number of episodes to run per session +num_eval_episodes: 1 + +# Number of time_steps per episode +num_eval_steps: 256 # Sets how often the agent will save a checkpoint (every n time episodes). # Set to 0 if no checkpoints are required. Default is 10 diff --git a/tests/config/single_action_space_fixed_blue_actions_main_config.yaml b/tests/config/single_action_space_fixed_blue_actions_main_config.yaml index 859b2ab3..0f378634 100644 --- a/tests/config/single_action_space_fixed_blue_actions_main_config.yaml +++ b/tests/config/single_action_space_fixed_blue_actions_main_config.yaml @@ -23,16 +23,11 @@ agent_identifier: RANDOM # "ANY" node and acl actions action_type: ANY # Number of episodes for training to run per session -num_train_episodes: 10 +num_train_episodes: 1 # Number of time_steps for training per episode -num_train_steps: 256 +num_train_steps: 15 -# Number of episodes for evaluation to run per session -num_eval_episodes: 10 - -# Number of time_steps for evaluation per episode -num_eval_steps: 256 # Time delay between steps (for generic agents) time_delay: 1 # Type of session to be run (TRAINING or EVALUATION) diff --git a/tests/config/single_action_space_lay_down_config.yaml b/tests/config/single_action_space_lay_down_config.yaml index c80c0bab..9d05b84a 100644 --- a/tests/config/single_action_space_lay_down_config.yaml +++ b/tests/config/single_action_space_lay_down_config.yaml @@ -32,14 +32,6 @@ - name: ftp port: '21' state: COMPROMISED -- item_type: POSITION - positions: - - node: '1' - x_pos: 309 - y_pos: 78 - - node: '2' - x_pos: 200 - y_pos: 78 - item_type: RED_IER id: '3' start_step: 2 diff --git a/tests/config/test_random_red_main_config.yaml b/tests/config/test_random_red_main_config.yaml index e0fc40ee..e2b24b41 100644 --- a/tests/config/test_random_red_main_config.yaml +++ b/tests/config/test_random_red_main_config.yaml @@ -29,16 +29,16 @@ random_red_agent: True # "ANY" node and acl actions action_type: NODE # Number of episodes for training to run per session -num_train_episodes: 10 +num_train_episodes: 2 # Number of time_steps for training per episode -num_train_steps: 256 +num_train_steps: 15 # Number of episodes for evaluation to run per session -num_eval_episodes: 10 +num_eval_episodes: 2 # Number of time_steps for evaluation per episode -num_eval_steps: 256 +num_eval_steps: 15 # Time delay between steps (for generic agents) time_delay: 1 diff --git a/tests/test_reward.py b/tests/test_reward.py index d1b56671..bb6eb1b0 100644 --- a/tests/test_reward.py +++ b/tests/test_reward.py @@ -47,6 +47,6 @@ def test_rewards_are_being_penalised_at_each_step_function( Average Reward: -8 (-120 / 15) """ with temp_primaite_session as session: - session.close() + session.evaluate() ev_rewards = session.eval_av_reward_per_episode_csv() assert ev_rewards[1] == -8.0 From 563ff72fd646d66ae737977eafde381faf6e8f58 Mon Sep 17 00:00:00 2001 From: SunilSamra Date: Mon, 10 Jul 2023 13:24:34 +0100 Subject: [PATCH 6/9] 1566 - fixed the test_training_config.py test file by removing num_steps from init --- src/primaite/config/training_config.py | 8 ++++---- tests/config/legacy_conversion/new_training_config.yaml | 5 ----- 2 files changed, 4 insertions(+), 9 deletions(-) diff --git a/src/primaite/config/training_config.py b/src/primaite/config/training_config.py index 5bbe881b..785d9757 100644 --- a/src/primaite/config/training_config.py +++ b/src/primaite/config/training_config.py @@ -300,7 +300,7 @@ def convert_legacy_training_config_dict( agent_framework: AgentFramework = AgentFramework.SB3, agent_identifier: AgentIdentifier = AgentIdentifier.PPO, action_type: ActionType = ActionType.ANY, - num_steps: int = 256, + num_train_steps: int = 256, ) -> Dict[str, Any]: """ Convert a legacy training config dict to the new format. @@ -312,15 +312,15 @@ def convert_legacy_training_config_dict( training configs don't have agent_identifier values. :param action_type: The action space type to set as legacy training configs don't have action_type values. - :param num_steps: The number of steps to set as legacy training configs - don't have num_steps values. + :param num_train_steps: The number of steps to set as legacy training configs + don't have num_train_steps values. :return: The converted training config dict. """ config_dict = { "agent_framework": agent_framework.name, "agent_identifier": agent_identifier.name, "action_type": action_type.name, - "num_steps": num_steps, + "num_train_steps": num_train_steps, "sb3_output_verbose_level": SB3OutputVerboseLevel.INFO.name, } session_type_map = {"TRAINING": "TRAIN", "EVALUATION": "EVAL"} diff --git a/tests/config/legacy_conversion/new_training_config.yaml b/tests/config/legacy_conversion/new_training_config.yaml index 5ca80742..c57741f7 100644 --- a/tests/config/legacy_conversion/new_training_config.yaml +++ b/tests/config/legacy_conversion/new_training_config.yaml @@ -26,11 +26,6 @@ num_train_episodes: 10 # Number of time_steps for training per episode num_train_steps: 256 -# Number of episodes for evaluation to run per session -num_eval_episodes: 10 - -# Number of time_steps for evaluation per episode -num_eval_steps: 256 # Time delay between steps (for generic agents) time_delay: 10 # Type of session to be run (TRAINING or EVALUATION) From f3750032bea2a5c600559fa4910e3031c41fdfdd Mon Sep 17 00:00:00 2001 From: SunilSamra Date: Tue, 11 Jul 2023 12:37:14 +0100 Subject: [PATCH 7/9] 1566 - applied pre-commit --- tests/test_train_eval_episode_steps.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_train_eval_episode_steps.py b/tests/test_train_eval_episode_steps.py index daa93055..b839e630 100644 --- a/tests/test_train_eval_episode_steps.py +++ b/tests/test_train_eval_episode_steps.py @@ -22,8 +22,8 @@ def test_eval_steps_differ_from_training(temp_primaite_session): num_eval_episodes = 1 """ expected_learning_metadata = {"total_episodes": 3, "total_time_steps": 75} - expected_evaluation_metadata = {"total_episodes": 1, "total_time_steps": 17} + with temp_primaite_session as session: # Run learning and check episode and step counts session.learn() From 585d35338f96c7354af96f140674cfc6d5faf9ac Mon Sep 17 00:00:00 2001 From: SunilSamra Date: Tue, 11 Jul 2023 12:40:25 +0100 Subject: [PATCH 8/9] 1566 - updated docs for new items in training_config --- docs/source/config.rst | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/docs/source/config.rst b/docs/source/config.rst index a28f0ec1..af590a24 100644 --- a/docs/source/config.rst +++ b/docs/source/config.rst @@ -83,13 +83,24 @@ The environment config file consists of the following attributes: The other configurable item is ``flatten`` which is false by default. When set to true, the observation space is flattened (turned into a 1-D vector). You should use this if your RL agent does not natively support observation space types like ``gym.Spaces.Tuple``. -* **num_episodes** [int] +* **num_train_episodes** [int] - This defines the number of episodes that the agent will train or be evaluated over. + This defines the number of episodes that the agent will train for. -* **num_steps** [int] - Determines the number of steps to run in each episode of the session +* **num_train_steps** [int] + + Determines the number of steps to run in each episode of the training session. + + +* **num_eval_episodes** [int] + + This defines the number of episodes that the agent will be evaluated over. + + +* **num_eval_steps** [int] + + Determines the number of steps to run in each episode of the evaluation session. * **time_delay** [int] From 96b48aad796b627034150f515a73bb27dd64d722 Mon Sep 17 00:00:00 2001 From: SunilSamra Date: Wed, 12 Jul 2023 09:52:54 +0100 Subject: [PATCH 9/9] 1566 - removed redundant config file --- tests/config/training_config_main.yaml | 153 ------------------------- 1 file changed, 153 deletions(-) delete mode 100644 tests/config/training_config_main.yaml diff --git a/tests/config/training_config_main.yaml b/tests/config/training_config_main.yaml deleted file mode 100644 index 3351d66b..00000000 --- a/tests/config/training_config_main.yaml +++ /dev/null @@ -1,153 +0,0 @@ -# Training Config File - -# Sets which agent algorithm framework will be used. -# Options are: -# "SB3" (Stable Baselines3) -# "RLLIB" (Ray RLlib) -# "CUSTOM" (Custom Agent) -agent_framework: SB3 - -# Sets which deep learning framework will be used (by RLlib ONLY). -# Default is TF (Tensorflow). -# Options are: -# "TF" (Tensorflow) -# TF2 (Tensorflow 2.X) -# TORCH (PyTorch) -deep_learning_framework: TF2 - -# Sets which Agent class will be used. -# Options are: -# "A2C" (Advantage Actor Critic coupled with either SB3 or RLLIB agent_framework) -# "PPO" (Proximal Policy Optimization coupled with either SB3 or RLLIB agent_framework) -# "HARDCODED" (The HardCoded agents coupled with an ACL or NODE action_type) -# "DO_NOTHING" (The DoNothing agents coupled with an ACL or NODE action_type) -# "RANDOM" (primaite.agents.simple.RandomAgent) -# "DUMMY" (primaite.agents.simple.DummyAgent) -agent_identifier: PPO - -# Sets whether Red Agent POL and IER is randomised. -# Options are: -# True -# False -random_red_agent: False - -# Sets what view of the environment the deterministic hardcoded agent has. The default is BASIC. -# Options are: -# "BASIC" (The current observation space only) -# "FULL" (Full environment view with actions taken and reward feedback) -hard_coded_agent_view: FULL - -# Sets How the Action Space is defined: -# "NODE" -# "ACL" -# "ANY" node and acl actions -action_type: NODE -# observation space -observation_space: - # flatten: true - components: - - name: NODE_LINK_TABLE - # - name: NODE_STATUSES - # - name: LINK_TRAFFIC_LEVELS - - -# Number of episodes for training to run per session -num_train_episodes: 10 - -# Number of time_steps for training per episode -num_train_steps: 256 - -# Number of episodes for evaluation to run per session -num_eval_episodes: 10 - -# Number of time_steps for evaluation per episode -num_eval_steps: 256 - -# Sets how often the agent will save a checkpoint (every n time episodes). -# Set to 0 if no checkpoints are required. Default is 10 -checkpoint_every_n_episodes: 10 - -# Time delay (milliseconds) between steps for CUSTOM agents. -time_delay: 5 - -# Type of session to be run. Options are: -# "TRAIN" (Trains an agent) -# "EVAL" (Evaluates an agent) -# "TRAIN_EVAL" (Trains then evaluates an agent) -session_type: TRAIN - -# Environment config values -# The high value for the observation space -observation_space_high_value: 1000000000 - -# The Stable Baselines3 learn/eval output verbosity level: -# Options are: -# "NONE" (No Output) -# "INFO" (Info Messages (such as devices and wrappers used)) -# "DEBUG" (All Messages) -sb3_output_verbose_level: NONE - -# Reward values -# Generic -all_ok: 0 -# Node Hardware State -off_should_be_on: -10 -off_should_be_resetting: -5 -on_should_be_off: -2 -on_should_be_resetting: -5 -resetting_should_be_on: -5 -resetting_should_be_off: -2 -resetting: -3 -# Node Software or Service State -good_should_be_patching: 2 -good_should_be_compromised: 5 -good_should_be_overwhelmed: 5 -patching_should_be_good: -5 -patching_should_be_compromised: 2 -patching_should_be_overwhelmed: 2 -patching: -3 -compromised_should_be_good: -20 -compromised_should_be_patching: -20 -compromised_should_be_overwhelmed: -20 -compromised: -20 -overwhelmed_should_be_good: -20 -overwhelmed_should_be_patching: -20 -overwhelmed_should_be_compromised: -20 -overwhelmed: -20 -# Node File System State -good_should_be_repairing: 2 -good_should_be_restoring: 2 -good_should_be_corrupt: 5 -good_should_be_destroyed: 10 -repairing_should_be_good: -5 -repairing_should_be_restoring: 2 -repairing_should_be_corrupt: 2 -repairing_should_be_destroyed: 0 -repairing: -3 -restoring_should_be_good: -10 -restoring_should_be_repairing: -2 -restoring_should_be_corrupt: 1 -restoring_should_be_destroyed: 2 -restoring: -6 -corrupt_should_be_good: -10 -corrupt_should_be_repairing: -10 -corrupt_should_be_restoring: -10 -corrupt_should_be_destroyed: 2 -corrupt: -10 -destroyed_should_be_good: -20 -destroyed_should_be_repairing: -20 -destroyed_should_be_restoring: -20 -destroyed_should_be_corrupt: -20 -destroyed: -20 -scanning: -2 -# IER status -red_ier_running: -5 -green_ier_blocked: -10 - -# Patching / Reset durations -os_patching_duration: 5 # The time taken to patch the OS -node_reset_duration: 5 # The time taken to reset a node (hardware) -service_patching_duration: 5 # The time taken to patch a service -file_system_repairing_limit: 5 # The time take to repair the file system -file_system_restoring_limit: 5 # The time take to restore the file system -file_system_scanning_limit: 5 # The time taken to scan the file system