# © Crown-owned copyright 2023, Defence Science and Technology Laboratory UK # Training Config File # Sets which agent algorithm framework will be used. # Options are: # "SB3" (Stable Baselines3) # "RLLIB" (Ray RLlib) # "CUSTOM" (Custom Agent) agent_framework: SB3 # Sets which deep learning framework will be used (by RLlib ONLY). # Default is TF (Tensorflow). # Options are: # "TF" (Tensorflow) # TF2 (Tensorflow 2.X) # TORCH (PyTorch) deep_learning_framework: TF2 # Sets which Agent class will be used. # Options are: # "A2C" (Advantage Actor Critic coupled with either SB3 or RLLIB agent_framework) # "PPO" (Proximal Policy Optimization coupled with either SB3 or RLLIB agent_framework) # "HARDCODED" (The HardCoded agents coupled with an ACL or NODE action_type) # "DO_NOTHING" (The DoNothing agents coupled with an ACL or NODE action_type) # "RANDOM" (primaite.agents.simple.RandomAgent) # "DUMMY" (primaite.agents.simple.DummyAgent) agent_identifier: PPO # Sets whether Red Agent POL and IER is randomised. # Options are: # True # False random_red_agent: False # The (integer) seed to be used in random number generation # Default is None (null) seed: None # Set whether the agent evaluation will be deterministic instead of stochastic # Options are: # True # False deterministic: False # Sets what view of the environment the deterministic hardcoded agent has. The default is BASIC. # Options are: # "BASIC" (The current observation space only) # "FULL" (Full environment view with actions taken and reward feedback) hard_coded_agent_view: FULL # Sets How the Action Space is defined: # "NODE" # "ACL" # "ANY" node and acl actions action_type: NODE # observation space observation_space: components: - name: NODE_LINK_TABLE # - name: NODE_STATUSES # - name: LINK_TRAFFIC_LEVELS # - name: ACCESS_CONTROL_LIST # Number of episodes to run per session num_train_episodes: 10 # Number of time_steps per episode num_train_steps: 256 # Number of episodes to run per session num_eval_episodes: 10 # Number of time_steps per episode num_eval_steps: 256 # Sets how often the agent will save a checkpoint (every n time episodes). # Set to 0 if no checkpoints are required. Default is 10 checkpoint_every_n_episodes: 0 # Time delay (milliseconds) between steps for CUSTOM agents. time_delay: 5 # Type of session to be run. Options are: # "TRAIN" (Trains an agent) # "EVAL" (Evaluates an agent) # "TRAIN_EVAL" (Trains then evaluates an agent) session_type: TRAIN_EVAL # Environment config values # The high value for the observation space observation_space_high_value: 1000000000 # The Stable Baselines3 learn/eval output verbosity level: # Options are: # "NONE" (No Output) # "INFO" (Info Messages (such as devices and wrappers used)) # "DEBUG" (All Messages) sb3_output_verbose_level: NONE # Reward values # Generic all_ok: 0.0000 # Node Hardware State off_should_be_on: -0.001 off_should_be_resetting: -0.0005 on_should_be_off: -0.0002 on_should_be_resetting: -0.0005 resetting_should_be_on: -0.0005 resetting_should_be_off: -0.0002 resetting: -0.0003 # Node Software or Service State good_should_be_patching: 0.0002 good_should_be_compromised: 0.0005 good_should_be_overwhelmed: 0.0005 patching_should_be_good: -0.0005 patching_should_be_compromised: 0.0002 patching_should_be_overwhelmed: 0.0002 patching: -0.0003 compromised_should_be_good: -0.002 compromised_should_be_patching: -0.002 compromised_should_be_overwhelmed: -0.002 compromised: -0.002 overwhelmed_should_be_good: -0.002 overwhelmed_should_be_patching: -0.002 overwhelmed_should_be_compromised: -0.002 overwhelmed: -0.002 # Node File System State good_should_be_repairing: 0.0002 good_should_be_restoring: 0.0002 good_should_be_corrupt: 0.0005 good_should_be_destroyed: 0.001 repairing_should_be_good: -0.0005 repairing_should_be_restoring: 0.0002 repairing_should_be_corrupt: 0.0002 repairing_should_be_destroyed: 0.0000 repairing: -0.0003 restoring_should_be_good: -0.001 restoring_should_be_repairing: -0.0002 restoring_should_be_corrupt: 0.0001 restoring_should_be_destroyed: 0.0002 restoring: -0.0006 corrupt_should_be_good: -0.001 corrupt_should_be_repairing: -0.001 corrupt_should_be_restoring: -0.001 corrupt_should_be_destroyed: 0.0002 corrupt: -0.001 destroyed_should_be_good: -0.002 destroyed_should_be_repairing: -0.002 destroyed_should_be_restoring: -0.002 destroyed_should_be_corrupt: -0.002 destroyed: -0.002 scanning: -0.0002 # IER status red_ier_running: -0.0005 green_ier_blocked: -0.001 # Patching / Reset durations os_patching_duration: 5 # The time taken to patch the OS node_reset_duration: 5 # The time taken to reset a node (hardware) service_patching_duration: 5 # The time taken to patch a service file_system_repairing_limit: 5 # The time take to repair the file system file_system_restoring_limit: 5 # The time take to restore the file system file_system_scanning_limit: 5 # The time taken to scan the file system