diff --git a/src/primaite/agents/agent.py b/src/primaite/agents/agent.py index 685fe776..4eb398b4 100644 --- a/src/primaite/agents/agent.py +++ b/src/primaite/agents/agent.py @@ -348,8 +348,8 @@ class HardCodedAgentSessionABC(AgentSessionABC): self._env.set_as_eval() # noqa self.is_eval = True - time_steps = self._training_config.num_steps - episodes = self._training_config.num_episodes + time_steps = self._training_config.num_eval_steps + episodes = self._training_config.num_eval_episodes obs = self._env.reset() for episode in range(episodes): diff --git a/src/primaite/agents/rllib.py b/src/primaite/agents/rllib.py index d851ba9c..443598e7 100644 --- a/src/primaite/agents/rllib.py +++ b/src/primaite/agents/rllib.py @@ -107,13 +107,13 @@ class RLlibAgent(AgentSessionABC): ), ) - self._agent_config.training(train_batch_size=self._training_config.num_steps) + self._agent_config.training(train_batch_size=self._training_config.num_train_steps) self._agent_config.framework(framework="tf") self._agent_config.rollouts( num_rollout_workers=1, num_envs_per_worker=1, - horizon=self._training_config.num_steps, + horizon=self._training_config.num_train_steps, ) self._agent: Algorithm = self._agent_config.build(logger_creator=_custom_log_creator(self.learning_path)) @@ -121,7 +121,7 @@ class RLlibAgent(AgentSessionABC): checkpoint_n = self._training_config.checkpoint_every_n_episodes episode_count = self._current_result["episodes_total"] if checkpoint_n > 0 and episode_count > 0: - if (episode_count % checkpoint_n == 0) or (episode_count == self._training_config.num_episodes): + if (episode_count % checkpoint_n == 0) or (episode_count == self._training_config.num_train_episodes): self._agent.save(str(self.checkpoints_path)) def learn( @@ -133,8 +133,8 @@ class RLlibAgent(AgentSessionABC): :param kwargs: Any agent-specific key-word args to be passed. """ - time_steps = self._training_config.num_steps - episodes = self._training_config.num_episodes + time_steps = self._training_config.num_train_steps + episodes = self._training_config.num_train_episodes _LOGGER.info(f"Beginning learning for {episodes} episodes @" f" {time_steps} time steps...") for i in range(episodes): diff --git a/src/primaite/agents/sb3.py b/src/primaite/agents/sb3.py index f5ac44cb..17827ff4 100644 --- a/src/primaite/agents/sb3.py +++ b/src/primaite/agents/sb3.py @@ -53,11 +53,12 @@ class SB3Agent(AgentSessionABC): session_path=self.session_path, timestamp_str=self.timestamp_str, ) + self._agent = self._agent_class( PPOMlp, self._env, verbose=self.sb3_output_verbose_level, - n_steps=self._training_config.num_steps, + n_steps=self._training_config.num_eval_steps, tensorboard_log=str(self._tensorboard_log_path), ) @@ -82,8 +83,8 @@ class SB3Agent(AgentSessionABC): :param kwargs: Any agent-specific key-word args to be passed. """ - time_steps = self._training_config.num_steps - episodes = self._training_config.num_episodes + time_steps = self._training_config.num_train_steps + episodes = self._training_config.num_train_episodes self.is_eval = False _LOGGER.info(f"Beginning learning for {episodes} episodes @" f" {time_steps} time steps...") for i in range(episodes): @@ -104,8 +105,8 @@ class SB3Agent(AgentSessionABC): :param deterministic: Whether the evaluation is deterministic. :param kwargs: Any agent-specific key-word args to be passed. """ - time_steps = self._training_config.num_steps - episodes = self._training_config.num_episodes + time_steps = self._training_config.num_eval_steps + episodes = self._training_config.num_eval_episodes self._env.set_as_eval() self.is_eval = True if deterministic: diff --git a/src/primaite/config/training_config.py b/src/primaite/config/training_config.py index bd73f65b..018fd982 100644 --- a/src/primaite/config/training_config.py +++ b/src/primaite/config/training_config.py @@ -60,11 +60,17 @@ class TrainingConfig: action_type: ActionType = ActionType.ANY "The ActionType to use" - num_episodes: int = 10 - "The number of episodes to train over" + num_train_episodes: int = 10 + "The number of episodes to train over during an training session" - num_steps: int = 256 - "The number of steps in an episode" + num_train_steps: int = 256 + "The number of steps in an episode during an training session" + + num_eval_episodes: int = 10 + "The number of episodes to train over during an evaluation session" + + num_eval_steps: int = 256 + "The number of steps in an episode during an evaluation session" checkpoint_every_n_episodes: int = 5 "The agent will save a checkpoint every n episodes" @@ -230,8 +236,17 @@ class TrainingConfig: tc += f"{self.hard_coded_agent_view}, " tc += f"{self.action_type}, " tc += f"observation_space={self.observation_space}, " - tc += f"{self.num_episodes} episodes @ " - tc += f"{self.num_steps} steps" + if self.session_type.name == "TRAIN": + tc += f"{self.num_train_episodes} episodes @ " + tc += f"{self.num_train_steps} steps" + elif self.session_type.name == "EVAL": + tc += f"{self.num_eval_episodes} episodes @ " + tc += f"{self.num_eval_steps} steps" + else: + tc += f"Training: {self.num_eval_episodes} episodes @ " + tc += f"{self.num_eval_steps} steps" + tc += f"Evaluation: {self.num_eval_episodes} episodes @ " + tc += f"{self.num_eval_steps} steps" return tc @@ -320,7 +335,8 @@ def _get_new_key_from_legacy(legacy_key: str) -> str: """ key_mapping = { "agentIdentifier": None, - "numEpisodes": "num_episodes", + "numEpisodes": "num_train_episodes", + "numSteps": "num_train_steps", "timeDelay": "time_delay", "configFilename": None, "sessionType": "session_type", diff --git a/src/primaite/environment/primaite_env.py b/src/primaite/environment/primaite_env.py index 03c23f93..c7e67e34 100644 --- a/src/primaite/environment/primaite_env.py +++ b/src/primaite/environment/primaite_env.py @@ -85,7 +85,12 @@ class Primaite(Env): _LOGGER.info(f"Using: {str(self.training_config)}") # Number of steps in an episode - self.episode_steps = self.training_config.num_steps + if self.training_config.session_type == SessionType.TRAIN: + self.episode_steps = self.training_config.num_train_steps + elif self.training_config.session_type == SessionType.EVAL: + self.episode_steps = self.training_config.num_eval_steps + else: + self.episode_steps = self.training_config.num_train_steps super(Primaite, self).__init__() @@ -254,6 +259,7 @@ class Primaite(Env): self.episode_count = 0 self.step_count = 0 self.total_step_count = 0 + self.episode_steps = self.training_config.num_eval_steps def reset(self): """ diff --git a/tests/config/legacy_conversion/new_training_config.yaml b/tests/config/legacy_conversion/new_training_config.yaml index 49e6a00b..5ca80742 100644 --- a/tests/config/legacy_conversion/new_training_config.yaml +++ b/tests/config/legacy_conversion/new_training_config.yaml @@ -20,10 +20,17 @@ agent_identifier: PPO # "ACL" # "ANY" node and acl actions action_type: ANY -# Number of episodes to run per session -num_episodes: 10 -# Number of time_steps per episode -num_steps: 256 +# Number of episodes for training to run per session +num_train_episodes: 10 + +# Number of time_steps for training per episode +num_train_steps: 256 + +# Number of episodes for evaluation to run per session +num_eval_episodes: 10 + +# Number of time_steps for evaluation per episode +num_eval_steps: 256 # Time delay between steps (for generic agents) time_delay: 10 # Type of session to be run (TRAINING or EVALUATION) diff --git a/tests/config/obs_tests/main_config_LINK_TRAFFIC_LEVELS.yaml b/tests/config/obs_tests/main_config_LINK_TRAFFIC_LEVELS.yaml index d26d7955..bbdce9c1 100644 --- a/tests/config/obs_tests/main_config_LINK_TRAFFIC_LEVELS.yaml +++ b/tests/config/obs_tests/main_config_LINK_TRAFFIC_LEVELS.yaml @@ -22,10 +22,17 @@ agent_identifier: A2C # "ACL" # "ANY" node and acl actions action_type: ANY -# Number of episodes to run per session -num_episodes: 1 -# Number of time_steps per episode -num_steps: 5 +# Number of episodes for training to run per session +num_train_episodes: 10 + +# Number of time_steps for training per episode +num_train_steps: 256 + +# Number of episodes for evaluation to run per session +num_eval_episodes: 10 + +# Number of time_steps for evaluation per episode +num_eval_steps: 256 observation_space: diff --git a/tests/config/obs_tests/main_config_NODE_LINK_TABLE.yaml b/tests/config/obs_tests/main_config_NODE_LINK_TABLE.yaml index aae740b6..41b3e588 100644 --- a/tests/config/obs_tests/main_config_NODE_LINK_TABLE.yaml +++ b/tests/config/obs_tests/main_config_NODE_LINK_TABLE.yaml @@ -22,10 +22,17 @@ agent_identifier: RANDOM # "ACL" # "ANY" node and acl actions action_type: ANY -# Number of episodes to run per session -num_episodes: 1 -# Number of time_steps per episode -num_steps: 5 +# Number of episodes for training to run per session +num_train_episodes: 10 + +# Number of time_steps for training per episode +num_train_steps: 256 + +# Number of episodes for evaluation to run per session +num_eval_episodes: 10 + +# Number of time_steps for evaluation per episode +num_eval_steps: 256 observation_space: components: diff --git a/tests/config/obs_tests/main_config_NODE_STATUSES.yaml b/tests/config/obs_tests/main_config_NODE_STATUSES.yaml index 4066eace..34758199 100644 --- a/tests/config/obs_tests/main_config_NODE_STATUSES.yaml +++ b/tests/config/obs_tests/main_config_NODE_STATUSES.yaml @@ -22,10 +22,17 @@ agent_identifier: RANDOM # "ACL" # "ANY" node and acl actions action_type: ANY -# Number of episodes to run per session -num_episodes: 1 -# Number of time_steps per episode -num_steps: 5 +# Number of episodes for training to run per session +num_train_episodes: 10 + +# Number of time_steps for training per episode +num_train_steps: 256 + +# Number of episodes for evaluation to run per session +num_eval_episodes: 10 + +# Number of time_steps for evaluation per episode +num_eval_steps: 256 observation_space: components: diff --git a/tests/config/obs_tests/main_config_without_obs.yaml b/tests/config/obs_tests/main_config_without_obs.yaml index 08452dda..352e765c 100644 --- a/tests/config/obs_tests/main_config_without_obs.yaml +++ b/tests/config/obs_tests/main_config_without_obs.yaml @@ -22,10 +22,17 @@ agent_identifier: RANDOM # "ACL" # "ANY" node and acl actions action_type: ANY -# Number of episodes to run per session -num_episodes: 1 -# Number of time_steps per episode -num_steps: 5 +# Number of episodes for training to run per session +num_train_episodes: 10 + +# Number of time_steps for training per episode +num_train_steps: 256 + +# Number of episodes for evaluation to run per session +num_eval_episodes: 10 + +# Number of time_steps for evaluation per episode +num_eval_steps: 256 # Time delay between steps (for generic agents) time_delay: 1 # Type of session to be run (TRAINING or EVALUATION) diff --git a/tests/config/one_node_states_on_off_main_config.yaml b/tests/config/one_node_states_on_off_main_config.yaml index 7f1ced01..63fdd1a5 100644 --- a/tests/config/one_node_states_on_off_main_config.yaml +++ b/tests/config/one_node_states_on_off_main_config.yaml @@ -22,10 +22,17 @@ agent_identifier: DUMMY # "ACL" # "ANY" node and acl actions action_type: NODE -# Number of episodes to run per session -num_episodes: 1 -# Number of time_steps per episode -num_steps: 15 +# Number of episodes for training to run per session +num_train_episodes: 10 + +# Number of time_steps for training per episode +num_train_steps: 256 + +# Number of episodes for evaluation to run per session +num_eval_episodes: 10 + +# Number of time_steps for evaluation per episode +num_eval_steps: 256 # Time delay between steps (for generic agents) time_delay: 1 diff --git a/tests/config/single_action_space_fixed_blue_actions_main_config.yaml b/tests/config/single_action_space_fixed_blue_actions_main_config.yaml index 97d0ddaf..859b2ab3 100644 --- a/tests/config/single_action_space_fixed_blue_actions_main_config.yaml +++ b/tests/config/single_action_space_fixed_blue_actions_main_config.yaml @@ -22,10 +22,17 @@ agent_identifier: RANDOM # "ACL" # "ANY" node and acl actions action_type: ANY -# Number of episodes to run per session -num_episodes: 1 -# Number of time_steps per episode -num_steps: 15 +# Number of episodes for training to run per session +num_train_episodes: 10 + +# Number of time_steps for training per episode +num_train_steps: 256 + +# Number of episodes for evaluation to run per session +num_eval_episodes: 10 + +# Number of time_steps for evaluation per episode +num_eval_steps: 256 # Time delay between steps (for generic agents) time_delay: 1 # Type of session to be run (TRAINING or EVALUATION) diff --git a/tests/config/single_action_space_main_config.yaml b/tests/config/single_action_space_main_config.yaml index 067b9a6d..c875757f 100644 --- a/tests/config/single_action_space_main_config.yaml +++ b/tests/config/single_action_space_main_config.yaml @@ -22,10 +22,17 @@ agent_identifier: RANDOM # "ACL" # "ANY" node and acl actions action_type: ANY -# Number of episodes to run per session -num_episodes: 1 -# Number of time_steps per episode -num_steps: 5 +# Number of episodes for training to run per session +num_train_episodes: 10 + +# Number of time_steps for training per episode +num_train_steps: 256 + +# Number of episodes for evaluation to run per session +num_eval_episodes: 10 + +# Number of time_steps for evaluation per episode +num_eval_steps: 256 # Time delay between steps (for generic agents) time_delay: 1 # Type of session to be run (TRAINING or EVALUATION) diff --git a/tests/config/test_random_red_main_config.yaml b/tests/config/test_random_red_main_config.yaml index 800fe808..e0fc40ee 100644 --- a/tests/config/test_random_red_main_config.yaml +++ b/tests/config/test_random_red_main_config.yaml @@ -28,10 +28,17 @@ random_red_agent: True # "ACL" # "ANY" node and acl actions action_type: NODE -# Number of episodes to run per session -num_episodes: 2 -# Number of time_steps per episode -num_steps: 15 +# Number of episodes for training to run per session +num_train_episodes: 10 + +# Number of time_steps for training per episode +num_train_steps: 256 + +# Number of episodes for evaluation to run per session +num_eval_episodes: 10 + +# Number of time_steps for evaluation per episode +num_eval_steps: 256 # Time delay between steps (for generic agents) time_delay: 1 diff --git a/tests/config/train_episode_step.yaml b/tests/config/train_episode_step.yaml new file mode 100644 index 00000000..550b95fd --- /dev/null +++ b/tests/config/train_episode_step.yaml @@ -0,0 +1,153 @@ +# Training Config File + +# Sets which agent algorithm framework will be used. +# Options are: +# "SB3" (Stable Baselines3) +# "RLLIB" (Ray RLlib) +# "CUSTOM" (Custom Agent) +agent_framework: SB3 + +# Sets which deep learning framework will be used (by RLlib ONLY). +# Default is TF (Tensorflow). +# Options are: +# "TF" (Tensorflow) +# TF2 (Tensorflow 2.X) +# TORCH (PyTorch) +deep_learning_framework: TF2 + +# Sets which Agent class will be used. +# Options are: +# "A2C" (Advantage Actor Critic coupled with either SB3 or RLLIB agent_framework) +# "PPO" (Proximal Policy Optimization coupled with either SB3 or RLLIB agent_framework) +# "HARDCODED" (The HardCoded agents coupled with an ACL or NODE action_type) +# "DO_NOTHING" (The DoNothing agents coupled with an ACL or NODE action_type) +# "RANDOM" (primaite.agents.simple.RandomAgent) +# "DUMMY" (primaite.agents.simple.DummyAgent) +agent_identifier: PPO + +# Sets whether Red Agent POL and IER is randomised. +# Options are: +# True +# False +random_red_agent: False + +# Sets what view of the environment the deterministic hardcoded agent has. The default is BASIC. +# Options are: +# "BASIC" (The current observation space only) +# "FULL" (Full environment view with actions taken and reward feedback) +hard_coded_agent_view: FULL + +# Sets How the Action Space is defined: +# "NODE" +# "ACL" +# "ANY" node and acl actions +action_type: NODE +# observation space +observation_space: + # flatten: true + components: + - name: NODE_LINK_TABLE + # - name: NODE_STATUSES + # - name: LINK_TRAFFIC_LEVELS + + +# Number of episodes for training to run per session +num_train_episodes: 30 + +# Number of time_steps for training per episode +num_train_steps: 1 + +# Number of episodes for evaluation to run per session +num_eval_episodes: 10 + +# Number of time_steps for evaluation per episode +num_eval_steps: 10 + +# Sets how often the agent will save a checkpoint (every n time episodes). +# Set to 0 if no checkpoints are required. Default is 10 +checkpoint_every_n_episodes: 10 + +# Time delay (milliseconds) between steps for CUSTOM agents. +time_delay: 5 + +# Type of session to be run. Options are: +# "TRAIN" (Trains an agent) +# "EVAL" (Evaluates an agent) +# "TRAIN_EVAL" (Trains then evaluates an agent) +session_type: EVAL + +# Environment config values +# The high value for the observation space +observation_space_high_value: 1000000000 + +# The Stable Baselines3 learn/eval output verbosity level: +# Options are: +# "NONE" (No Output) +# "INFO" (Info Messages (such as devices and wrappers used)) +# "DEBUG" (All Messages) +sb3_output_verbose_level: NONE + +# Reward values +# Generic +all_ok: 0 +# Node Hardware State +off_should_be_on: -10 +off_should_be_resetting: -5 +on_should_be_off: -2 +on_should_be_resetting: -5 +resetting_should_be_on: -5 +resetting_should_be_off: -2 +resetting: -3 +# Node Software or Service State +good_should_be_patching: 2 +good_should_be_compromised: 5 +good_should_be_overwhelmed: 5 +patching_should_be_good: -5 +patching_should_be_compromised: 2 +patching_should_be_overwhelmed: 2 +patching: -3 +compromised_should_be_good: -20 +compromised_should_be_patching: -20 +compromised_should_be_overwhelmed: -20 +compromised: -20 +overwhelmed_should_be_good: -20 +overwhelmed_should_be_patching: -20 +overwhelmed_should_be_compromised: -20 +overwhelmed: -20 +# Node File System State +good_should_be_repairing: 2 +good_should_be_restoring: 2 +good_should_be_corrupt: 5 +good_should_be_destroyed: 10 +repairing_should_be_good: -5 +repairing_should_be_restoring: 2 +repairing_should_be_corrupt: 2 +repairing_should_be_destroyed: 0 +repairing: -3 +restoring_should_be_good: -10 +restoring_should_be_repairing: -2 +restoring_should_be_corrupt: 1 +restoring_should_be_destroyed: 2 +restoring: -6 +corrupt_should_be_good: -10 +corrupt_should_be_repairing: -10 +corrupt_should_be_restoring: -10 +corrupt_should_be_destroyed: 2 +corrupt: -10 +destroyed_should_be_good: -20 +destroyed_should_be_repairing: -20 +destroyed_should_be_restoring: -20 +destroyed_should_be_corrupt: -20 +destroyed: -20 +scanning: -2 +# IER status +red_ier_running: -5 +green_ier_blocked: -10 + +# Patching / Reset durations +os_patching_duration: 5 # The time taken to patch the OS +node_reset_duration: 5 # The time taken to reset a node (hardware) +service_patching_duration: 5 # The time taken to patch a service +file_system_repairing_limit: 5 # The time take to repair the file system +file_system_restoring_limit: 5 # The time take to restore the file system +file_system_scanning_limit: 5 # The time taken to scan the file system diff --git a/tests/config/train_eval_check_episode_step.yaml b/tests/config/train_eval_check_episode_step.yaml new file mode 100644 index 00000000..f616116e --- /dev/null +++ b/tests/config/train_eval_check_episode_step.yaml @@ -0,0 +1,153 @@ +# Training Config File + +# Sets which agent algorithm framework will be used. +# Options are: +# "SB3" (Stable Baselines3) +# "RLLIB" (Ray RLlib) +# "CUSTOM" (Custom Agent) +agent_framework: SB3 + +# Sets which deep learning framework will be used (by RLlib ONLY). +# Default is TF (Tensorflow). +# Options are: +# "TF" (Tensorflow) +# TF2 (Tensorflow 2.X) +# TORCH (PyTorch) +deep_learning_framework: TF2 + +# Sets which Agent class will be used. +# Options are: +# "A2C" (Advantage Actor Critic coupled with either SB3 or RLLIB agent_framework) +# "PPO" (Proximal Policy Optimization coupled with either SB3 or RLLIB agent_framework) +# "HARDCODED" (The HardCoded agents coupled with an ACL or NODE action_type) +# "DO_NOTHING" (The DoNothing agents coupled with an ACL or NODE action_type) +# "RANDOM" (primaite.agents.simple.RandomAgent) +# "DUMMY" (primaite.agents.simple.DummyAgent) +agent_identifier: PPO + +# Sets whether Red Agent POL and IER is randomised. +# Options are: +# True +# False +random_red_agent: False + +# Sets what view of the environment the deterministic hardcoded agent has. The default is BASIC. +# Options are: +# "BASIC" (The current observation space only) +# "FULL" (Full environment view with actions taken and reward feedback) +hard_coded_agent_view: FULL + +# Sets How the Action Space is defined: +# "NODE" +# "ACL" +# "ANY" node and acl actions +action_type: NODE +# observation space +observation_space: + # flatten: true + components: + - name: NODE_LINK_TABLE + # - name: NODE_STATUSES + # - name: LINK_TRAFFIC_LEVELS + + +# Number of episodes for training to run per session +num_train_episodes: 30 + +# Number of time_steps for training per episode +num_train_steps: 1 + +# Number of episodes for evaluation to run per session +num_eval_episodes: 10 + +# Number of time_steps for evaluation per episode +num_eval_steps: 10 + +# Sets how often the agent will save a checkpoint (every n time episodes). +# Set to 0 if no checkpoints are required. Default is 10 +checkpoint_every_n_episodes: 10 + +# Time delay (milliseconds) between steps for CUSTOM agents. +time_delay: 5 + +# Type of session to be run. Options are: +# "TRAIN" (Trains an agent) +# "EVAL" (Evaluates an agent) +# "TRAIN_EVAL" (Trains then evaluates an agent) +session_type: TRAIN + +# Environment config values +# The high value for the observation space +observation_space_high_value: 1000000000 + +# The Stable Baselines3 learn/eval output verbosity level: +# Options are: +# "NONE" (No Output) +# "INFO" (Info Messages (such as devices and wrappers used)) +# "DEBUG" (All Messages) +sb3_output_verbose_level: NONE + +# Reward values +# Generic +all_ok: 0 +# Node Hardware State +off_should_be_on: -10 +off_should_be_resetting: -5 +on_should_be_off: -2 +on_should_be_resetting: -5 +resetting_should_be_on: -5 +resetting_should_be_off: -2 +resetting: -3 +# Node Software or Service State +good_should_be_patching: 2 +good_should_be_compromised: 5 +good_should_be_overwhelmed: 5 +patching_should_be_good: -5 +patching_should_be_compromised: 2 +patching_should_be_overwhelmed: 2 +patching: -3 +compromised_should_be_good: -20 +compromised_should_be_patching: -20 +compromised_should_be_overwhelmed: -20 +compromised: -20 +overwhelmed_should_be_good: -20 +overwhelmed_should_be_patching: -20 +overwhelmed_should_be_compromised: -20 +overwhelmed: -20 +# Node File System State +good_should_be_repairing: 2 +good_should_be_restoring: 2 +good_should_be_corrupt: 5 +good_should_be_destroyed: 10 +repairing_should_be_good: -5 +repairing_should_be_restoring: 2 +repairing_should_be_corrupt: 2 +repairing_should_be_destroyed: 0 +repairing: -3 +restoring_should_be_good: -10 +restoring_should_be_repairing: -2 +restoring_should_be_corrupt: 1 +restoring_should_be_destroyed: 2 +restoring: -6 +corrupt_should_be_good: -10 +corrupt_should_be_repairing: -10 +corrupt_should_be_restoring: -10 +corrupt_should_be_destroyed: 2 +corrupt: -10 +destroyed_should_be_good: -20 +destroyed_should_be_repairing: -20 +destroyed_should_be_restoring: -20 +destroyed_should_be_corrupt: -20 +destroyed: -20 +scanning: -2 +# IER status +red_ier_running: -5 +green_ier_blocked: -10 + +# Patching / Reset durations +os_patching_duration: 5 # The time taken to patch the OS +node_reset_duration: 5 # The time taken to reset a node (hardware) +service_patching_duration: 5 # The time taken to patch a service +file_system_repairing_limit: 5 # The time take to repair the file system +file_system_restoring_limit: 5 # The time take to restore the file system +file_system_scanning_limit: 5 # The time taken to scan the file system diff --git a/tests/config/training_config_main.yaml b/tests/config/training_config_main.yaml new file mode 100644 index 00000000..3351d66b --- /dev/null +++ b/tests/config/training_config_main.yaml @@ -0,0 +1,153 @@ +# Training Config File + +# Sets which agent algorithm framework will be used. +# Options are: +# "SB3" (Stable Baselines3) +# "RLLIB" (Ray RLlib) +# "CUSTOM" (Custom Agent) +agent_framework: SB3 + +# Sets which deep learning framework will be used (by RLlib ONLY). +# Default is TF (Tensorflow). +# Options are: +# "TF" (Tensorflow) +# TF2 (Tensorflow 2.X) +# TORCH (PyTorch) +deep_learning_framework: TF2 + +# Sets which Agent class will be used. +# Options are: +# "A2C" (Advantage Actor Critic coupled with either SB3 or RLLIB agent_framework) +# "PPO" (Proximal Policy Optimization coupled with either SB3 or RLLIB agent_framework) +# "HARDCODED" (The HardCoded agents coupled with an ACL or NODE action_type) +# "DO_NOTHING" (The DoNothing agents coupled with an ACL or NODE action_type) +# "RANDOM" (primaite.agents.simple.RandomAgent) +# "DUMMY" (primaite.agents.simple.DummyAgent) +agent_identifier: PPO + +# Sets whether Red Agent POL and IER is randomised. +# Options are: +# True +# False +random_red_agent: False + +# Sets what view of the environment the deterministic hardcoded agent has. The default is BASIC. +# Options are: +# "BASIC" (The current observation space only) +# "FULL" (Full environment view with actions taken and reward feedback) +hard_coded_agent_view: FULL + +# Sets How the Action Space is defined: +# "NODE" +# "ACL" +# "ANY" node and acl actions +action_type: NODE +# observation space +observation_space: + # flatten: true + components: + - name: NODE_LINK_TABLE + # - name: NODE_STATUSES + # - name: LINK_TRAFFIC_LEVELS + + +# Number of episodes for training to run per session +num_train_episodes: 10 + +# Number of time_steps for training per episode +num_train_steps: 256 + +# Number of episodes for evaluation to run per session +num_eval_episodes: 10 + +# Number of time_steps for evaluation per episode +num_eval_steps: 256 + +# Sets how often the agent will save a checkpoint (every n time episodes). +# Set to 0 if no checkpoints are required. Default is 10 +checkpoint_every_n_episodes: 10 + +# Time delay (milliseconds) between steps for CUSTOM agents. +time_delay: 5 + +# Type of session to be run. Options are: +# "TRAIN" (Trains an agent) +# "EVAL" (Evaluates an agent) +# "TRAIN_EVAL" (Trains then evaluates an agent) +session_type: TRAIN + +# Environment config values +# The high value for the observation space +observation_space_high_value: 1000000000 + +# The Stable Baselines3 learn/eval output verbosity level: +# Options are: +# "NONE" (No Output) +# "INFO" (Info Messages (such as devices and wrappers used)) +# "DEBUG" (All Messages) +sb3_output_verbose_level: NONE + +# Reward values +# Generic +all_ok: 0 +# Node Hardware State +off_should_be_on: -10 +off_should_be_resetting: -5 +on_should_be_off: -2 +on_should_be_resetting: -5 +resetting_should_be_on: -5 +resetting_should_be_off: -2 +resetting: -3 +# Node Software or Service State +good_should_be_patching: 2 +good_should_be_compromised: 5 +good_should_be_overwhelmed: 5 +patching_should_be_good: -5 +patching_should_be_compromised: 2 +patching_should_be_overwhelmed: 2 +patching: -3 +compromised_should_be_good: -20 +compromised_should_be_patching: -20 +compromised_should_be_overwhelmed: -20 +compromised: -20 +overwhelmed_should_be_good: -20 +overwhelmed_should_be_patching: -20 +overwhelmed_should_be_compromised: -20 +overwhelmed: -20 +# Node File System State +good_should_be_repairing: 2 +good_should_be_restoring: 2 +good_should_be_corrupt: 5 +good_should_be_destroyed: 10 +repairing_should_be_good: -5 +repairing_should_be_restoring: 2 +repairing_should_be_corrupt: 2 +repairing_should_be_destroyed: 0 +repairing: -3 +restoring_should_be_good: -10 +restoring_should_be_repairing: -2 +restoring_should_be_corrupt: 1 +restoring_should_be_destroyed: 2 +restoring: -6 +corrupt_should_be_good: -10 +corrupt_should_be_repairing: -10 +corrupt_should_be_restoring: -10 +corrupt_should_be_destroyed: 2 +corrupt: -10 +destroyed_should_be_good: -20 +destroyed_should_be_repairing: -20 +destroyed_should_be_restoring: -20 +destroyed_should_be_corrupt: -20 +destroyed: -20 +scanning: -2 +# IER status +red_ier_running: -5 +green_ier_blocked: -10 + +# Patching / Reset durations +os_patching_duration: 5 # The time taken to patch the OS +node_reset_duration: 5 # The time taken to reset a node (hardware) +service_patching_duration: 5 # The time taken to patch a service +file_system_repairing_limit: 5 # The time take to repair the file system +file_system_restoring_limit: 5 # The time take to restore the file system +file_system_scanning_limit: 5 # The time taken to scan the file system diff --git a/tests/conftest.py b/tests/conftest.py index af76b314..7e06bea7 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -163,8 +163,9 @@ def run_generic(env, config_values): """Run against a generic agent.""" # Reset the environment at the start of the episode # env.reset() - for episode in range(0, config_values.num_episodes): - for step in range(0, config_values.num_steps): + print(config_values.num_train_episodes, "how many episodes") + for episode in range(0, config_values.num_train_episodes): + for step in range(0, config_values.num_train_steps): # Send the observation space to the agent to get an action # TEMP - random action for now # action = env.blue_agent_action(obs) diff --git a/tests/test_single_action_space.py b/tests/test_single_action_space.py index 5d55b9c9..bfcffd42 100644 --- a/tests/test_single_action_space.py +++ b/tests/test_single_action_space.py @@ -12,8 +12,8 @@ def run_generic_set_actions(env: Primaite): # Reset the environment at the start of the episode # env.reset() training_config = env.training_config - for episode in range(0, training_config.num_episodes): - for step in range(0, training_config.num_steps): + for episode in range(0, training_config.num_train_episodes): + for step in range(0, training_config.num_train_steps): # Send the observation space to the agent to get an action # TEMP - random action for now # action = env.blue_agent_action(obs) diff --git a/tests/test_train_eval_episode_steps.py b/tests/test_train_eval_episode_steps.py new file mode 100644 index 00000000..fad30f1b --- /dev/null +++ b/tests/test_train_eval_episode_steps.py @@ -0,0 +1,46 @@ +import pytest + +from primaite import getLogger +from primaite.config.lay_down_config import dos_very_basic_config_path +from tests import TEST_CONFIG_ROOT +from tests.conftest import run_generic + +_LOGGER = getLogger(__name__) + + +@pytest.mark.parametrize( + "temp_primaite_session", + [[TEST_CONFIG_ROOT / "train_episode_step.yaml", dos_very_basic_config_path()]], + indirect=True, +) +def test_eval_steps_differ_from_training(temp_primaite_session): + """Uses PrimaiteSession class to compare number of episodes used for training and evaluation.""" + with temp_primaite_session as train_session: + env = train_session.env + train_session.learn() + + """ + Train_episode_step.yaml main config: + num_train_steps = 1 + num_eval_steps = 10 + + When the YAML file changes from TRAIN to EVAL the episode step changes and uses the other config value. + + The test is showing that 10 steps are running for evaluation and NOT 1 step as EVAL has been selected in the config. + """ + assert env.episode_steps == 10 # 30 + # assert env.actual_episode_count == 10 # should be 10 + + +@pytest.mark.parametrize( + "temp_primaite_session", + [[TEST_CONFIG_ROOT / "train_episode_step.yaml", dos_very_basic_config_path()]], + indirect=True, +) +def test_train_eval_config_option(temp_primaite_session): + """Uses PrimaiteSession class to test number of episodes and steps used for TRAIN and EVAL option.""" + with temp_primaite_session as train_session: + env = train_session.env + run_generic(env, env.training_config) + + print(env.actual_episode_count, env.step_count, env.total_step_count)