PrimAITE/tests/config/train_episode_step.yaml

# © Crown-owned copyright 2023, Defence Science and Technology Laboratory UK
# Training Config File

# Sets which agent algorithm framework will be used.
# Options are:
# "SB3" (Stable Baselines3)
# "RLLIB" (Ray RLlib)
# "CUSTOM" (Custom Agent)
agent_framework: SB3

# Sets which deep learning framework will be used (by RLlib ONLY).
# Default is TF (Tensorflow).
# Options are:
# "TF" (Tensorflow)
# TF2 (Tensorflow 2.X)
# TORCH (PyTorch)
deep_learning_framework: TF2

# Sets which Agent class will be used.
# Options are:
# "A2C" (Advantage Actor Critic coupled with either SB3 or RLLIB agent_framework)
# "PPO" (Proximal Policy Optimization coupled with either SB3 or RLLIB agent_framework)
# "HARDCODED" (The HardCoded agents coupled with an ACL or NODE action_type)
# "DO_NOTHING" (The DoNothing agents coupled with an ACL or NODE action_type)
# "RANDOM" (primaite.agents.simple.RandomAgent)
# "DUMMY" (primaite.agents.simple.DummyAgent)
agent_identifier: PPO

# Sets whether Red Agent POL and IER is randomised.
# Options are:
# True
# False
random_red_agent: False

# Sets what view of the environment the deterministic hardcoded agent has. The default is BASIC.
# Options are:
# "BASIC" (The current observation space only)
# "FULL" (Full environment view with actions taken and reward feedback)
hard_coded_agent_view: FULL

# Sets How the Action Space is defined:
# "NODE"
# "ACL"
# "ANY" node and acl actions
action_type: NODE
# observation space
observation_space:
  # flatten: true
  components:
    - name: NODE_LINK_TABLE
    # - name: NODE_STATUSES
    # - name: LINK_TRAFFIC_LEVELS


# Number of episodes for training to run per session
num_train_episodes: 3

# Number of time_steps for training per episode
num_train_steps: 25

# Number of episodes for evaluation to run per session
num_eval_episodes: 1

# Number of time_steps for evaluation per episode
num_eval_steps: 17

# Sets how often the agent will save a checkpoint (every n time episodes).
# Set to 0 if no checkpoints are required. Default is 10
checkpoint_every_n_episodes: 0

# Time delay (milliseconds) between steps for CUSTOM agents.
time_delay: 5

# Type of session to be run. Options are:
# "TRAIN" (Trains an agent)
# "EVAL" (Evaluates an agent)
# "TRAIN_EVAL" (Trains then evaluates an agent)
session_type: TRAIN_EVAL

# Environment config values
# The high value for the observation space
observation_space_high_value: 1000000000

# The Stable Baselines3 learn/eval output verbosity level:
# Options are:
# "NONE" (No Output)
# "INFO" (Info Messages (such as devices and wrappers used))
# "DEBUG" (All Messages)
sb3_output_verbose_level: NONE

# Reward values
# Generic
all_ok: 0
# Node Hardware State
off_should_be_on: -10
off_should_be_resetting: -5
on_should_be_off: -2
on_should_be_resetting: -5
resetting_should_be_on: -5
resetting_should_be_off: -2
resetting: -3
# Node Software or Service State
good_should_be_patching: 2
good_should_be_compromised: 5
good_should_be_overwhelmed: 5
patching_should_be_good: -5
patching_should_be_compromised: 2
patching_should_be_overwhelmed: 2
patching: -3
compromised_should_be_good: -20
compromised_should_be_patching: -20
compromised_should_be_overwhelmed: -20
compromised: -20
overwhelmed_should_be_good: -20
overwhelmed_should_be_patching: -20
overwhelmed_should_be_compromised: -20
overwhelmed: -20
# Node File System State
good_should_be_repairing: 2
good_should_be_restoring: 2
good_should_be_corrupt: 5
good_should_be_destroyed: 10
repairing_should_be_good: -5
repairing_should_be_restoring: 2
repairing_should_be_corrupt: 2
repairing_should_be_destroyed: 0
repairing: -3
restoring_should_be_good: -10
restoring_should_be_repairing: -2
restoring_should_be_corrupt: 1
restoring_should_be_destroyed: 2
restoring: -6
corrupt_should_be_good: -10
corrupt_should_be_repairing: -10
corrupt_should_be_restoring: -10
corrupt_should_be_destroyed: 2
corrupt: -10
destroyed_should_be_good: -20
destroyed_should_be_repairing: -20
destroyed_should_be_restoring: -20
destroyed_should_be_corrupt: -20
destroyed: -20
scanning: -2
# IER status
red_ier_running: -5
green_ier_blocked: -10

# Patching / Reset durations
os_patching_duration: 5            # The time taken to patch the OS
node_reset_duration: 5             # The time taken to reset a node (hardware)
service_patching_duration: 5       # The time taken to patch a service
file_system_repairing_limit: 5      # The time take to repair the file system
file_system_restoring_limit: 5      # The time take to restore the file system
file_system_scanning_limit: 5       # The time taken to scan the file system
#1648 - Updated file header from 'Crown Owned Copyright (C) Dstl 2023. DEFCON 703. Shared in confidence.' to '© Crown-owned copyright 2023, Defence Science and Technology Laboratory UK' 2023-07-21 14:54:09 +01:00			`# © Crown-owned copyright 2023, Defence Science and Technology Laboratory UK`
1566 - added test file and edited configs to include types of num steps and modifed agents to use correct step and episode counts 2023-07-07 14:13:47 +01:00			`# Training Config File`

			`# Sets which agent algorithm framework will be used.`
			`# Options are:`
			`# "SB3" (Stable Baselines3)`
			`# "RLLIB" (Ray RLlib)`
			`# "CUSTOM" (Custom Agent)`
			`agent_framework: SB3`

			`# Sets which deep learning framework will be used (by RLlib ONLY).`
			`# Default is TF (Tensorflow).`
			`# Options are:`
			`# "TF" (Tensorflow)`
			`# TF2 (Tensorflow 2.X)`
			`# TORCH (PyTorch)`
			`deep_learning_framework: TF2`

			`# Sets which Agent class will be used.`
			`# Options are:`
			`# "A2C" (Advantage Actor Critic coupled with either SB3 or RLLIB agent_framework)`
			`# "PPO" (Proximal Policy Optimization coupled with either SB3 or RLLIB agent_framework)`
			`# "HARDCODED" (The HardCoded agents coupled with an ACL or NODE action_type)`
			`# "DO_NOTHING" (The DoNothing agents coupled with an ACL or NODE action_type)`
			`# "RANDOM" (primaite.agents.simple.RandomAgent)`
			`# "DUMMY" (primaite.agents.simple.DummyAgent)`
			`agent_identifier: PPO`

			`# Sets whether Red Agent POL and IER is randomised.`
			`# Options are:`
			`# True`
			`# False`
			`random_red_agent: False`

			`# Sets what view of the environment the deterministic hardcoded agent has. The default is BASIC.`
			`# Options are:`
			`# "BASIC" (The current observation space only)`
			`# "FULL" (Full environment view with actions taken and reward feedback)`
			`hard_coded_agent_view: FULL`

			`# Sets How the Action Space is defined:`
			`# "NODE"`
			`# "ACL"`
			`# "ANY" node and acl actions`
			`action_type: NODE`
			`# observation space`
			`observation_space:`
			`# flatten: true`
			`components:`
			`- name: NODE_LINK_TABLE`
			`# - name: NODE_STATUSES`
			`# - name: LINK_TRAFFIC_LEVELS`


			`# Number of episodes for training to run per session`
#1566 - Refactored the test_train_eval_episode_steps.py to sue TempPrimaiteSession. - Fixed all errors that were caused b fixing the above. - Some tests still fail, these are for SS to fix. - Dropped the old run_generic stuff from conftest.py 2023-07-07 15:50:14 +01:00			`num_train_episodes: 3`
1566 - added test file and edited configs to include types of num steps and modifed agents to use correct step and episode counts 2023-07-07 14:13:47 +01:00
			`# Number of time_steps for training per episode`
#1566 - Refactored the test_train_eval_episode_steps.py to sue TempPrimaiteSession. - Fixed all errors that were caused b fixing the above. - Some tests still fail, these are for SS to fix. - Dropped the old run_generic stuff from conftest.py 2023-07-07 15:50:14 +01:00			`num_train_steps: 25`
1566 - added test file and edited configs to include types of num steps and modifed agents to use correct step and episode counts 2023-07-07 14:13:47 +01:00
			`# Number of episodes for evaluation to run per session`
#1566 - Refactored the test_train_eval_episode_steps.py to sue TempPrimaiteSession. - Fixed all errors that were caused b fixing the above. - Some tests still fail, these are for SS to fix. - Dropped the old run_generic stuff from conftest.py 2023-07-07 15:50:14 +01:00			`num_eval_episodes: 1`
1566 - added test file and edited configs to include types of num steps and modifed agents to use correct step and episode counts 2023-07-07 14:13:47 +01:00
			`# Number of time_steps for evaluation per episode`
#1566 - Refactored the test_train_eval_episode_steps.py to sue TempPrimaiteSession. - Fixed all errors that were caused b fixing the above. - Some tests still fail, these are for SS to fix. - Dropped the old run_generic stuff from conftest.py 2023-07-07 15:50:14 +01:00			`num_eval_steps: 17`
1566 - added test file and edited configs to include types of num steps and modifed agents to use correct step and episode counts 2023-07-07 14:13:47 +01:00
			`# Sets how often the agent will save a checkpoint (every n time episodes).`
			`# Set to 0 if no checkpoints are required. Default is 10`
#1566 - Refactored the test_train_eval_episode_steps.py to sue TempPrimaiteSession. - Fixed all errors that were caused b fixing the above. - Some tests still fail, these are for SS to fix. - Dropped the old run_generic stuff from conftest.py 2023-07-07 15:50:14 +01:00			`checkpoint_every_n_episodes: 0`
1566 - added test file and edited configs to include types of num steps and modifed agents to use correct step and episode counts 2023-07-07 14:13:47 +01:00
			`# Time delay (milliseconds) between steps for CUSTOM agents.`
			`time_delay: 5`

			`# Type of session to be run. Options are:`
			`# "TRAIN" (Trains an agent)`
			`# "EVAL" (Evaluates an agent)`
			`# "TRAIN_EVAL" (Trains then evaluates an agent)`
#1566 - Refactored the test_train_eval_episode_steps.py to sue TempPrimaiteSession. - Fixed all errors that were caused b fixing the above. - Some tests still fail, these are for SS to fix. - Dropped the old run_generic stuff from conftest.py 2023-07-07 15:50:14 +01:00			`session_type: TRAIN_EVAL`
1566 - added test file and edited configs to include types of num steps and modifed agents to use correct step and episode counts 2023-07-07 14:13:47 +01:00
			`# Environment config values`
			`# The high value for the observation space`
			`observation_space_high_value: 1000000000`

			`# The Stable Baselines3 learn/eval output verbosity level:`
			`# Options are:`
			`# "NONE" (No Output)`
			`# "INFO" (Info Messages (such as devices and wrappers used))`
			`# "DEBUG" (All Messages)`
			`sb3_output_verbose_level: NONE`

			`# Reward values`
			`# Generic`
			`all_ok: 0`
			`# Node Hardware State`
			`off_should_be_on: -10`
			`off_should_be_resetting: -5`
			`on_should_be_off: -2`
			`on_should_be_resetting: -5`
			`resetting_should_be_on: -5`
			`resetting_should_be_off: -2`
			`resetting: -3`
			`# Node Software or Service State`
			`good_should_be_patching: 2`
			`good_should_be_compromised: 5`
			`good_should_be_overwhelmed: 5`
			`patching_should_be_good: -5`
			`patching_should_be_compromised: 2`
			`patching_should_be_overwhelmed: 2`
			`patching: -3`
			`compromised_should_be_good: -20`
			`compromised_should_be_patching: -20`
			`compromised_should_be_overwhelmed: -20`
			`compromised: -20`
			`overwhelmed_should_be_good: -20`
			`overwhelmed_should_be_patching: -20`
			`overwhelmed_should_be_compromised: -20`
			`overwhelmed: -20`
			`# Node File System State`
			`good_should_be_repairing: 2`
			`good_should_be_restoring: 2`
			`good_should_be_corrupt: 5`
			`good_should_be_destroyed: 10`
			`repairing_should_be_good: -5`
			`repairing_should_be_restoring: 2`
			`repairing_should_be_corrupt: 2`
			`repairing_should_be_destroyed: 0`
			`repairing: -3`
			`restoring_should_be_good: -10`
			`restoring_should_be_repairing: -2`
			`restoring_should_be_corrupt: 1`
			`restoring_should_be_destroyed: 2`
			`restoring: -6`
			`corrupt_should_be_good: -10`
			`corrupt_should_be_repairing: -10`
			`corrupt_should_be_restoring: -10`
			`corrupt_should_be_destroyed: 2`
			`corrupt: -10`
			`destroyed_should_be_good: -20`
			`destroyed_should_be_repairing: -20`
			`destroyed_should_be_restoring: -20`
			`destroyed_should_be_corrupt: -20`
			`destroyed: -20`
			`scanning: -2`
			`# IER status`
			`red_ier_running: -5`
			`green_ier_blocked: -10`

			`# Patching / Reset durations`
			`os_patching_duration: 5 # The time taken to patch the OS`
			`node_reset_duration: 5 # The time taken to reset a node (hardware)`
			`service_patching_duration: 5 # The time taken to patch a service`
			`file_system_repairing_limit: 5 # The time take to repair the file system`
			`file_system_restoring_limit: 5 # The time take to restore the file system`
			`file_system_scanning_limit: 5 # The time taken to scan the file system`