# © Crown-owned copyright 2023, Defence Science and Technology Laboratory UK # Training Config File # Sets which agent algorithm framework will be used. # Options are: # "SB3" (Stable Baselines3) # "RLLIB" (Ray RLlib) # "CUSTOM" (Custom Agent) agent_framework: SB3 # Sets which deep learning framework will be used (by RLlib ONLY). # Default is TF (Tensorflow). # Options are: # "TF" (Tensorflow) # TF2 (Tensorflow 2.X) # TORCH (PyTorch) deep_learning_framework: TF2 # Sets which Agent class will be used. # Options are: # "A2C" (Advantage Actor Critic coupled with either SB3 or RLLIB agent_framework) # "PPO" (Proximal Policy Optimization coupled with either SB3 or RLLIB agent_framework) # "HARDCODED" (The HardCoded agents coupled with an ACL or NODE action_type) # "DO_NOTHING" (The DoNothing agents coupled with an ACL or NODE action_type) # "RANDOM" (primaite.agents.simple.RandomAgent) # "DUMMY" (primaite.agents.simple.DummyAgent) agent_identifier: PPO # Sets whether Red Agent POL and IER is randomised. # Options are: # True # False random_red_agent: False # Sets what view of the environment the deterministic hardcoded agent has. The default is BASIC. # Options are: # "BASIC" (The current observation space only) # "FULL" (Full environment view with actions taken and reward feedback) hard_coded_agent_view: FULL # Sets How the Action Space is defined: # "NODE" # "ACL" # "ANY" node and acl actions action_type: NODE # observation space observation_space: # flatten: true components: - name: NODE_LINK_TABLE # - name: NODE_STATUSES # - name: LINK_TRAFFIC_LEVELS # Number of episodes for training to run per session num_train_episodes: 3 # Number of time_steps for training per episode num_train_steps: 25 # Number of episodes for evaluation to run per session num_eval_episodes: 1 # Number of time_steps for evaluation per episode num_eval_steps: 17 # Sets how often the agent will save a checkpoint (every n time episodes). # Set to 0 if no checkpoints are required. Default is 10 checkpoint_every_n_episodes: 0 # Time delay (milliseconds) between steps for CUSTOM agents. time_delay: 5 # Type of session to be run. Options are: # "TRAIN" (Trains an agent) # "EVAL" (Evaluates an agent) # "TRAIN_EVAL" (Trains then evaluates an agent) session_type: TRAIN_EVAL # Environment config values # The high value for the observation space observation_space_high_value: 1000000000 # The Stable Baselines3 learn/eval output verbosity level: # Options are: # "NONE" (No Output) # "INFO" (Info Messages (such as devices and wrappers used)) # "DEBUG" (All Messages) sb3_output_verbose_level: NONE # Reward values # Generic all_ok: 0 # Node Hardware State off_should_be_on: -10 off_should_be_resetting: -5 on_should_be_off: -2 on_should_be_resetting: -5 resetting_should_be_on: -5 resetting_should_be_off: -2 resetting: -3 # Node Software or Service State good_should_be_patching: 2 good_should_be_compromised: 5 good_should_be_overwhelmed: 5 patching_should_be_good: -5 patching_should_be_compromised: 2 patching_should_be_overwhelmed: 2 patching: -3 compromised_should_be_good: -20 compromised_should_be_patching: -20 compromised_should_be_overwhelmed: -20 compromised: -20 overwhelmed_should_be_good: -20 overwhelmed_should_be_patching: -20 overwhelmed_should_be_compromised: -20 overwhelmed: -20 # Node File System State good_should_be_repairing: 2 good_should_be_restoring: 2 good_should_be_corrupt: 5 good_should_be_destroyed: 10 repairing_should_be_good: -5 repairing_should_be_restoring: 2 repairing_should_be_corrupt: 2 repairing_should_be_destroyed: 0 repairing: -3 restoring_should_be_good: -10 restoring_should_be_repairing: -2 restoring_should_be_corrupt: 1 restoring_should_be_destroyed: 2 restoring: -6 corrupt_should_be_good: -10 corrupt_should_be_repairing: -10 corrupt_should_be_restoring: -10 corrupt_should_be_destroyed: 2 corrupt: -10 destroyed_should_be_good: -20 destroyed_should_be_repairing: -20 destroyed_should_be_restoring: -20 destroyed_should_be_corrupt: -20 destroyed: -20 scanning: -2 # IER status red_ier_running: -5 green_ier_blocked: -10 # Patching / Reset durations os_patching_duration: 5 # The time taken to patch the OS node_reset_duration: 5 # The time taken to reset a node (hardware) service_patching_duration: 5 # The time taken to patch a service file_system_repairing_limit: 5 # The time take to repair the file system file_system_restoring_limit: 5 # The time take to restore the file system file_system_scanning_limit: 5 # The time taken to scan the file system