From db27bea4ec37dd2d4ad7082ad951a36881168144 Mon Sep 17 00:00:00 2001 From: Charlie Crane Date: Tue, 25 Jun 2024 12:29:01 +0100 Subject: [PATCH 1/5] #2656 - Committing current state before lunch. New ActionPenalty reward added. Basic implementation returns a -1 reward if last_action_response.action isn't DONOTHING. Minor change in data_manipulation so I can see it working in the data_manipulation notebook. Need to use configured values but so far, promising?. Looks to result in a better average reward than without which is good, I think. --- .../_package_data/data_manipulation.yaml | 1 - src/primaite/game/agent/rewards.py | 33 +++++++++++++++++++ 2 files changed, 33 insertions(+), 1 deletion(-) diff --git a/src/primaite/config/_package_data/data_manipulation.yaml b/src/primaite/config/_package_data/data_manipulation.yaml index 6cded5f2..1ec98f39 100644 --- a/src/primaite/config/_package_data/data_manipulation.yaml +++ b/src/primaite/config/_package_data/data_manipulation.yaml @@ -740,7 +740,6 @@ agents: agent_name: client_2_green_user - agent_settings: flatten_obs: true diff --git a/src/primaite/game/agent/rewards.py b/src/primaite/game/agent/rewards.py index cabea5f4..7d14e097 100644 --- a/src/primaite/game/agent/rewards.py +++ b/src/primaite/game/agent/rewards.py @@ -360,6 +360,38 @@ class SharedReward(AbstractReward): return cls(agent_name=agent_name) +class ActionPenalty(AbstractReward): + """ + Apply a negative reward when taking any action except DONOTHING. + + Optional Configuration item therefore default value of 0 (?). + """ + + def __init__(self, agent_name: str, penalty: float = 0): + """ + Initialise the reward. + + Penalty will default to 0, as this is an optional param. + """ + self.agent_name = agent_name + self.penalty = penalty + + def calculate(self, state: Dict, last_action_response: "AgentHistoryItem") -> float: + """Calculate the penalty to be applied.""" + if last_action_response.action == "DONOTHING": + # No penalty for doing nothing at present + return 0 + else: + return -1 + + @classmethod + def from_config(cls, config: Dict) -> "ActionPenalty": + """Build the ActionPenalty object from config.""" + agent_name = config.get("agent_name") + # penalty_value = config.get("ACTION_PENALTY", 0) + return cls(agent_name=agent_name) + + class RewardFunction: """Manages the reward function for the agent.""" @@ -370,6 +402,7 @@ class RewardFunction: "WEBPAGE_UNAVAILABLE_PENALTY": WebpageUnavailablePenalty, "GREEN_ADMIN_DATABASE_UNREACHABLE_PENALTY": GreenAdminDatabaseUnreachablePenalty, "SHARED_REWARD": SharedReward, + "ACTION_PENALTY": ActionPenalty, } """List of reward class identifiers.""" From 5ad16fdb7eecfd3d0e9f8e6349a0d542066405a7 Mon Sep 17 00:00:00 2001 From: Charlie Crane Date: Tue, 25 Jun 2024 15:36:47 +0100 Subject: [PATCH 2/5] #2656 - Corrected from_config() for ActionPenalty so that it can pull the negative reward value from YAML and apply, defaulting to 0 still if not found/not configured. Currerntly prints to terminal when a negative reward is being applied, though this is for implementation and troubleshooting. To be removed before PR is pushed out of draft --- .../config/_package_data/data_manipulation.yaml | 5 +++++ src/primaite/game/agent/rewards.py | 11 +++++++---- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/src/primaite/config/_package_data/data_manipulation.yaml b/src/primaite/config/_package_data/data_manipulation.yaml index 1ec98f39..be613918 100644 --- a/src/primaite/config/_package_data/data_manipulation.yaml +++ b/src/primaite/config/_package_data/data_manipulation.yaml @@ -739,6 +739,11 @@ agents: options: agent_name: client_2_green_user + - type: ACTION_PENALTY + weight: 1.0 + options: + agent_name: defender + penalty_value: -1 agent_settings: flatten_obs: true diff --git a/src/primaite/game/agent/rewards.py b/src/primaite/game/agent/rewards.py index 7d14e097..d75597f0 100644 --- a/src/primaite/game/agent/rewards.py +++ b/src/primaite/game/agent/rewards.py @@ -367,7 +367,7 @@ class ActionPenalty(AbstractReward): Optional Configuration item therefore default value of 0 (?). """ - def __init__(self, agent_name: str, penalty: float = 0): + def __init__(self, agent_name: str, penalty: float): """ Initialise the reward. @@ -382,14 +382,17 @@ class ActionPenalty(AbstractReward): # No penalty for doing nothing at present return 0 else: - return -1 + _LOGGER.info( + f"Blue agent has incurred a penalty of {self.penalty}, for action: {last_action_response.action}" + ) + return self.penalty @classmethod def from_config(cls, config: Dict) -> "ActionPenalty": """Build the ActionPenalty object from config.""" agent_name = config.get("agent_name") - # penalty_value = config.get("ACTION_PENALTY", 0) - return cls(agent_name=agent_name) + penalty_value = config.get("penalty_value", 0) # default to 0 so that no adverse effects. + return cls(agent_name=agent_name, penalty=penalty_value) class RewardFunction: From 7a833afe2d608176eb2c775551b7b1093cab27e8 Mon Sep 17 00:00:00 2001 From: Charlie Crane Date: Wed, 26 Jun 2024 12:20:28 +0100 Subject: [PATCH 3/5] #2656 - Unit tests for new ActionPenalty reward component, testing yaml and some minor changes to the implementation. Need to update Documentation to detail how this is added --- src/primaite/game/agent/rewards.py | 13 +- tests/assets/configs/action_penalty.yaml | 929 ++++++++++++++++++ .../game_layer/test_rewards.py | 66 +- 3 files changed, 999 insertions(+), 9 deletions(-) create mode 100644 tests/assets/configs/action_penalty.yaml diff --git a/src/primaite/game/agent/rewards.py b/src/primaite/game/agent/rewards.py index d75597f0..a0736bb0 100644 --- a/src/primaite/game/agent/rewards.py +++ b/src/primaite/game/agent/rewards.py @@ -361,17 +361,14 @@ class SharedReward(AbstractReward): class ActionPenalty(AbstractReward): - """ - Apply a negative reward when taking any action except DONOTHING. - - Optional Configuration item therefore default value of 0 (?). - """ + """Apply a negative reward when taking any action except DONOTHING.""" def __init__(self, agent_name: str, penalty: float): """ Initialise the reward. - Penalty will default to 0, as this is an optional param. + This negative reward should be applied when the agent in training chooses to take any + action that isn't DONOTHING. """ self.agent_name = agent_name self.penalty = penalty @@ -383,7 +380,7 @@ class ActionPenalty(AbstractReward): return 0 else: _LOGGER.info( - f"Blue agent has incurred a penalty of {self.penalty}, for action: {last_action_response.action}" + f"Blue Agent has incurred a penalty of {self.penalty}, for action: {last_action_response.action}" ) return self.penalty @@ -391,7 +388,7 @@ class ActionPenalty(AbstractReward): def from_config(cls, config: Dict) -> "ActionPenalty": """Build the ActionPenalty object from config.""" agent_name = config.get("agent_name") - penalty_value = config.get("penalty_value", 0) # default to 0 so that no adverse effects. + penalty_value = config.get("penalty_value", 0) # default to 0. return cls(agent_name=agent_name, penalty=penalty_value) diff --git a/tests/assets/configs/action_penalty.yaml b/tests/assets/configs/action_penalty.yaml new file mode 100644 index 00000000..4eb562fe --- /dev/null +++ b/tests/assets/configs/action_penalty.yaml @@ -0,0 +1,929 @@ +io_settings: + save_agent_actions: false + save_step_metadata: false + save_pcap_logs: false + save_sys_logs: false + + +game: + max_episode_length: 256 + ports: + - HTTP + - POSTGRES_SERVER + protocols: + - ICMP + - TCP + - UDP + thresholds: + nmne: + high: 10 + medium: 5 + low: 0 + +agents: + - ref: client_2_green_user + team: GREEN + type: ProbabilisticAgent + agent_settings: + action_probabilities: + 0: 0.3 + 1: 0.6 + 2: 0.1 + observation_space: null + action_space: + action_list: + - type: DONOTHING + - type: NODE_APPLICATION_EXECUTE + options: + nodes: + - node_name: client_2 + applications: + - application_name: WebBrowser + - application_name: DatabaseClient + max_folders_per_node: 1 + max_files_per_folder: 1 + max_services_per_node: 1 + max_applications_per_node: 2 + action_map: + 0: + action: DONOTHING + options: {} + 1: + action: NODE_APPLICATION_EXECUTE + options: + node_id: 0 + application_id: 0 + 2: + action: NODE_APPLICATION_EXECUTE + options: + node_id: 0 + application_id: 1 + + reward_function: + reward_components: + - type: WEBPAGE_UNAVAILABLE_PENALTY + weight: 0.25 + options: + node_hostname: client_2 + - type: GREEN_ADMIN_DATABASE_UNREACHABLE_PENALTY + weight: 0.05 + options: + node_hostname: client_2 + + - ref: client_1_green_user + team: GREEN + type: ProbabilisticAgent + agent_settings: + action_probabilities: + 0: 0.3 + 1: 0.6 + 2: 0.1 + observation_space: null + action_space: + action_list: + - type: DONOTHING + - type: NODE_APPLICATION_EXECUTE + options: + nodes: + - node_name: client_1 + applications: + - application_name: WebBrowser + - application_name: DatabaseClient + max_folders_per_node: 1 + max_files_per_folder: 1 + max_services_per_node: 1 + max_applications_per_node: 2 + action_map: + 0: + action: DONOTHING + options: {} + 1: + action: NODE_APPLICATION_EXECUTE + options: + node_id: 0 + application_id: 0 + 2: + action: NODE_APPLICATION_EXECUTE + options: + node_id: 0 + application_id: 1 + + reward_function: + reward_components: + - type: WEBPAGE_UNAVAILABLE_PENALTY + weight: 0.25 + options: + node_hostname: client_1 + - type: GREEN_ADMIN_DATABASE_UNREACHABLE_PENALTY + weight: 0.05 + options: + node_hostname: client_1 + + - ref: data_manipulation_attacker + team: RED + type: RedDatabaseCorruptingAgent + + observation_space: null + + action_space: + action_list: + - type: DONOTHING + - type: NODE_APPLICATION_EXECUTE + options: + nodes: + - node_name: client_1 + applications: + - application_name: DataManipulationBot + - node_name: client_2 + applications: + - application_name: DataManipulationBot + max_folders_per_node: 1 + max_files_per_folder: 1 + max_services_per_node: 1 + + reward_function: + reward_components: + - type: DUMMY + + agent_settings: # options specific to this particular agent type, basically args of __init__(self) + start_settings: + start_step: 25 + frequency: 20 + variance: 5 + + - ref: defender + team: BLUE + type: ProxyAgent + + observation_space: + type: CUSTOM + options: + components: + - type: NODES + label: NODES + options: + hosts: + - hostname: domain_controller + - hostname: web_server + services: + - service_name: WebServer + - hostname: database_server + folders: + - folder_name: database + files: + - file_name: database.db + - hostname: backup_server + - hostname: security_suite + - hostname: client_1 + - hostname: client_2 + num_services: 1 + num_applications: 0 + num_folders: 1 + num_files: 1 + num_nics: 2 + include_num_access: false + include_nmne: true + routers: + - hostname: router_1 + num_ports: 0 + ip_list: + - 192.168.1.10 + - 192.168.1.12 + - 192.168.1.14 + - 192.168.1.16 + - 192.168.1.110 + - 192.168.10.21 + - 192.168.10.22 + - 192.168.10.110 + wildcard_list: + - 0.0.0.1 + port_list: + - 80 + - 5432 + protocol_list: + - ICMP + - TCP + - UDP + num_rules: 10 + + - type: LINKS + label: LINKS + options: + link_references: + - router_1:eth-1<->switch_1:eth-8 + - router_1:eth-2<->switch_2:eth-8 + - switch_1:eth-1<->domain_controller:eth-1 + - switch_1:eth-2<->web_server:eth-1 + - switch_1:eth-3<->database_server:eth-1 + - switch_1:eth-4<->backup_server:eth-1 + - switch_1:eth-7<->security_suite:eth-1 + - switch_2:eth-1<->client_1:eth-1 + - switch_2:eth-2<->client_2:eth-1 + - switch_2:eth-7<->security_suite:eth-2 + - type: "NONE" + label: ICS + options: {} + + action_space: + action_list: + - type: DONOTHING + - type: NODE_SERVICE_SCAN + - type: NODE_SERVICE_STOP + - type: NODE_SERVICE_START + - type: NODE_SERVICE_PAUSE + - type: NODE_SERVICE_RESUME + - type: NODE_SERVICE_RESTART + - type: NODE_SERVICE_DISABLE + - type: NODE_SERVICE_ENABLE + - type: NODE_SERVICE_FIX + - type: NODE_FILE_SCAN + - type: NODE_FILE_CHECKHASH + - type: NODE_FILE_DELETE + - type: NODE_FILE_REPAIR + - type: NODE_FILE_RESTORE + - type: NODE_FOLDER_SCAN + - type: NODE_FOLDER_CHECKHASH + - type: NODE_FOLDER_REPAIR + - type: NODE_FOLDER_RESTORE + - type: NODE_OS_SCAN + - type: NODE_SHUTDOWN + - type: NODE_STARTUP + - type: NODE_RESET + - type: ROUTER_ACL_ADDRULE + - type: ROUTER_ACL_REMOVERULE + - type: HOST_NIC_ENABLE + - type: HOST_NIC_DISABLE + + action_map: + 0: + action: DONOTHING + options: {} + # scan webapp service + 1: + action: NODE_SERVICE_SCAN + options: + node_id: 1 + service_id: 0 + # stop webapp service + 2: + action: NODE_SERVICE_STOP + options: + node_id: 1 + service_id: 0 + # start webapp service + 3: + action: "NODE_SERVICE_START" + options: + node_id: 1 + service_id: 0 + 4: + action: "NODE_SERVICE_PAUSE" + options: + node_id: 1 + service_id: 0 + 5: + action: "NODE_SERVICE_RESUME" + options: + node_id: 1 + service_id: 0 + 6: + action: "NODE_SERVICE_RESTART" + options: + node_id: 1 + service_id: 0 + 7: + action: "NODE_SERVICE_DISABLE" + options: + node_id: 1 + service_id: 0 + 8: + action: "NODE_SERVICE_ENABLE" + options: + node_id: 1 + service_id: 0 + 9: # check database.db file + action: "NODE_FILE_SCAN" + options: + node_id: 2 + folder_id: 0 + file_id: 0 + 10: + action: "NODE_FILE_CHECKHASH" + options: + node_id: 2 + folder_id: 0 + file_id: 0 + 11: + action: "NODE_FILE_DELETE" + options: + node_id: 2 + folder_id: 0 + file_id: 0 + 12: + action: "NODE_FILE_REPAIR" + options: + node_id: 2 + folder_id: 0 + file_id: 0 + 13: + action: "NODE_SERVICE_FIX" + options: + node_id: 2 + service_id: 0 + 14: + action: "NODE_FOLDER_SCAN" + options: + node_id: 2 + folder_id: 0 + 15: + action: "NODE_FOLDER_CHECKHASH" + options: + node_id: 2 + folder_id: 0 + 16: + action: "NODE_FOLDER_REPAIR" + options: + node_id: 2 + folder_id: 0 + 17: + action: "NODE_FOLDER_RESTORE" + options: + node_id: 2 + folder_id: 0 + 18: + action: "NODE_OS_SCAN" + options: + node_id: 0 + 19: + action: "NODE_SHUTDOWN" + options: + node_id: 0 + 20: + action: NODE_STARTUP + options: + node_id: 0 + 21: + action: NODE_RESET + options: + node_id: 0 + 22: + action: "NODE_OS_SCAN" + options: + node_id: 1 + 23: + action: "NODE_SHUTDOWN" + options: + node_id: 1 + 24: + action: NODE_STARTUP + options: + node_id: 1 + 25: + action: NODE_RESET + options: + node_id: 1 + 26: # old action num: 18 + action: "NODE_OS_SCAN" + options: + node_id: 2 + 27: + action: "NODE_SHUTDOWN" + options: + node_id: 2 + 28: + action: NODE_STARTUP + options: + node_id: 2 + 29: + action: NODE_RESET + options: + node_id: 2 + 30: + action: "NODE_OS_SCAN" + options: + node_id: 3 + 31: + action: "NODE_SHUTDOWN" + options: + node_id: 3 + 32: + action: NODE_STARTUP + options: + node_id: 3 + 33: + action: NODE_RESET + options: + node_id: 3 + 34: + action: "NODE_OS_SCAN" + options: + node_id: 4 + 35: + action: "NODE_SHUTDOWN" + options: + node_id: 4 + 36: + action: NODE_STARTUP + options: + node_id: 4 + 37: + action: NODE_RESET + options: + node_id: 4 + 38: + action: "NODE_OS_SCAN" + options: + node_id: 5 + 39: # old action num: 19 # shutdown client 1 + action: "NODE_SHUTDOWN" + options: + node_id: 5 + 40: # old action num: 20 + action: NODE_STARTUP + options: + node_id: 5 + 41: # old action num: 21 + action: NODE_RESET + options: + node_id: 5 + 42: + action: "NODE_OS_SCAN" + options: + node_id: 6 + 43: + action: "NODE_SHUTDOWN" + options: + node_id: 6 + 44: + action: NODE_STARTUP + options: + node_id: 6 + 45: + action: NODE_RESET + options: + node_id: 6 + + 46: # old action num: 22 # "ACL: ADDRULE - Block outgoing traffic from client 1" + action: "ROUTER_ACL_ADDRULE" + options: + target_router_nodename: router_1 + position: 1 + permission: 2 + source_ip_id: 7 # client 1 + dest_ip_id: 1 # ALL + source_port_id: 1 + dest_port_id: 1 + protocol_id: 1 + source_wildcard_id: 0 + dest_wildcard_id: 0 + 47: # old action num: 23 # "ACL: ADDRULE - Block outgoing traffic from client 2" + action: "ROUTER_ACL_ADDRULE" + options: + target_router_nodename: router_1 + position: 2 + permission: 2 + source_ip_id: 8 # client 2 + dest_ip_id: 1 # ALL + source_port_id: 1 + dest_port_id: 1 + protocol_id: 1 + source_wildcard_id: 0 + dest_wildcard_id: 0 + 48: # old action num: 24 # block tcp traffic from client 1 to web app + action: "ROUTER_ACL_ADDRULE" + options: + target_router_nodename: router_1 + position: 3 + permission: 2 + source_ip_id: 7 # client 1 + dest_ip_id: 3 # web server + source_port_id: 1 + dest_port_id: 1 + protocol_id: 3 + source_wildcard_id: 0 + dest_wildcard_id: 0 + 49: # old action num: 25 # block tcp traffic from client 2 to web app + action: "ROUTER_ACL_ADDRULE" + options: + target_router_nodename: router_1 + position: 4 + permission: 2 + source_ip_id: 8 # client 2 + dest_ip_id: 3 # web server + source_port_id: 1 + dest_port_id: 1 + protocol_id: 3 + source_wildcard_id: 0 + dest_wildcard_id: 0 + 50: # old action num: 26 + action: "ROUTER_ACL_ADDRULE" + options: + target_router_nodename: router_1 + position: 5 + permission: 2 + source_ip_id: 7 # client 1 + dest_ip_id: 4 # database + source_port_id: 1 + dest_port_id: 1 + protocol_id: 3 + source_wildcard_id: 0 + dest_wildcard_id: 0 + 51: # old action num: 27 + action: "ROUTER_ACL_ADDRULE" + options: + target_router_nodename: router_1 + position: 6 + permission: 2 + source_ip_id: 8 # client 2 + dest_ip_id: 4 # database + source_port_id: 1 + dest_port_id: 1 + protocol_id: 3 + source_wildcard_id: 0 + dest_wildcard_id: 0 + 52: # old action num: 28 + action: "ROUTER_ACL_REMOVERULE" + options: + target_router_nodename: router_1 + position: 0 + 53: # old action num: 29 + action: "ROUTER_ACL_REMOVERULE" + options: + target_router_nodename: router_1 + position: 1 + 54: # old action num: 30 + action: "ROUTER_ACL_REMOVERULE" + options: + target_router_nodename: router_1 + position: 2 + 55: # old action num: 31 + action: "ROUTER_ACL_REMOVERULE" + options: + target_router_nodename: router_1 + position: 3 + 56: # old action num: 32 + action: "ROUTER_ACL_REMOVERULE" + options: + target_router_nodename: router_1 + position: 4 + 57: # old action num: 33 + action: "ROUTER_ACL_REMOVERULE" + options: + target_router_nodename: router_1 + position: 5 + 58: # old action num: 34 + action: "ROUTER_ACL_REMOVERULE" + options: + target_router_nodename: router_1 + position: 6 + 59: # old action num: 35 + action: "ROUTER_ACL_REMOVERULE" + options: + target_router_nodename: router_1 + position: 7 + 60: # old action num: 36 + action: "ROUTER_ACL_REMOVERULE" + options: + target_router_nodename: router_1 + position: 8 + 61: # old action num: 37 + action: "ROUTER_ACL_REMOVERULE" + options: + target_router_nodename: router_1 + position: 9 + 62: # old action num: 38 + action: "HOST_NIC_DISABLE" + options: + node_id: 0 + nic_id: 0 + 63: # old action num: 39 + action: "HOST_NIC_ENABLE" + options: + node_id: 0 + nic_id: 0 + 64: # old action num: 40 + action: "HOST_NIC_DISABLE" + options: + node_id: 1 + nic_id: 0 + 65: # old action num: 41 + action: "HOST_NIC_ENABLE" + options: + node_id: 1 + nic_id: 0 + 66: # old action num: 42 + action: "HOST_NIC_DISABLE" + options: + node_id: 2 + nic_id: 0 + 67: # old action num: 43 + action: "HOST_NIC_ENABLE" + options: + node_id: 2 + nic_id: 0 + 68: # old action num: 44 + action: "HOST_NIC_DISABLE" + options: + node_id: 3 + nic_id: 0 + 69: # old action num: 45 + action: "HOST_NIC_ENABLE" + options: + node_id: 3 + nic_id: 0 + 70: # old action num: 46 + action: "HOST_NIC_DISABLE" + options: + node_id: 4 + nic_id: 0 + 71: # old action num: 47 + action: "HOST_NIC_ENABLE" + options: + node_id: 4 + nic_id: 0 + 72: # old action num: 48 + action: "HOST_NIC_DISABLE" + options: + node_id: 4 + nic_id: 1 + 73: # old action num: 49 + action: "HOST_NIC_ENABLE" + options: + node_id: 4 + nic_id: 1 + 74: # old action num: 50 + action: "HOST_NIC_DISABLE" + options: + node_id: 5 + nic_id: 0 + 75: # old action num: 51 + action: "HOST_NIC_ENABLE" + options: + node_id: 5 + nic_id: 0 + 76: # old action num: 52 + action: "HOST_NIC_DISABLE" + options: + node_id: 6 + nic_id: 0 + 77: # old action num: 53 + action: "HOST_NIC_ENABLE" + options: + node_id: 6 + nic_id: 0 + + + + options: + nodes: + - node_name: domain_controller + - node_name: web_server + applications: + - application_name: DatabaseClient + services: + - service_name: WebServer + - node_name: database_server + folders: + - folder_name: database + files: + - file_name: database.db + services: + - service_name: DatabaseService + - node_name: backup_server + - node_name: security_suite + - node_name: client_1 + - node_name: client_2 + + max_folders_per_node: 2 + max_files_per_folder: 2 + max_services_per_node: 2 + max_nics_per_node: 8 + max_acl_rules: 10 + ip_list: + - 192.168.1.10 + - 192.168.1.12 + - 192.168.1.14 + - 192.168.1.16 + - 192.168.1.110 + - 192.168.10.21 + - 192.168.10.22 + - 192.168.10.110 + + + reward_function: + reward_components: + - type: SHARED_REWARD + weight: 1.0 + options: + agent_name: client_1_green_user + - type: SHARED_REWARD + weight: 1.0 + options: + agent_name: client_2_green_user + - type: ACTION_PENALTY + weight: 1.0 + options: + agent_name: defender + penalty_value: -1 + + + agent_settings: + flatten_obs: true + + + +simulation: + network: + nmne_config: + capture_nmne: true + nmne_capture_keywords: + - DELETE + nodes: + + - hostname: router_1 + type: router + num_ports: 5 + ports: + 1: + ip_address: 192.168.1.1 + subnet_mask: 255.255.255.0 + 2: + ip_address: 192.168.10.1 + subnet_mask: 255.255.255.0 + acl: + 18: + action: PERMIT + src_port: POSTGRES_SERVER + dst_port: POSTGRES_SERVER + 19: + action: PERMIT + src_port: DNS + dst_port: DNS + 20: + action: PERMIT + src_port: FTP + dst_port: FTP + 21: + action: PERMIT + src_port: HTTP + dst_port: HTTP + 22: + action: PERMIT + src_port: ARP + dst_port: ARP + 23: + action: PERMIT + protocol: ICMP + + - hostname: switch_1 + type: switch + num_ports: 8 + + - hostname: switch_2 + type: switch + num_ports: 8 + + - hostname: domain_controller + type: server + ip_address: 192.168.1.10 + subnet_mask: 255.255.255.0 + default_gateway: 192.168.1.1 + services: + - type: DNSServer + options: + domain_mapping: + arcd.com: 192.168.1.12 # web server + + - hostname: web_server + type: server + ip_address: 192.168.1.12 + subnet_mask: 255.255.255.0 + default_gateway: 192.168.1.1 + dns_server: 192.168.1.10 + services: + - type: WebServer + applications: + - type: DatabaseClient + options: + db_server_ip: 192.168.1.14 + + + - hostname: database_server + type: server + ip_address: 192.168.1.14 + subnet_mask: 255.255.255.0 + default_gateway: 192.168.1.1 + dns_server: 192.168.1.10 + services: + - type: DatabaseService + options: + backup_server_ip: 192.168.1.16 + - type: FTPClient + + - hostname: backup_server + type: server + ip_address: 192.168.1.16 + subnet_mask: 255.255.255.0 + default_gateway: 192.168.1.1 + dns_server: 192.168.1.10 + services: + - type: FTPServer + + - hostname: security_suite + type: server + ip_address: 192.168.1.110 + subnet_mask: 255.255.255.0 + default_gateway: 192.168.1.1 + dns_server: 192.168.1.10 + network_interfaces: + 2: # unfortunately this number is currently meaningless, they're just added in order and take up the next available slot + ip_address: 192.168.10.110 + subnet_mask: 255.255.255.0 + + - hostname: client_1 + type: computer + ip_address: 192.168.10.21 + subnet_mask: 255.255.255.0 + default_gateway: 192.168.10.1 + dns_server: 192.168.1.10 + applications: + - type: DataManipulationBot + options: + port_scan_p_of_success: 0.8 + data_manipulation_p_of_success: 0.8 + payload: "DELETE" + server_ip: 192.168.1.14 + - type: WebBrowser + options: + target_url: http://arcd.com/users/ + - type: DatabaseClient + options: + db_server_ip: 192.168.1.14 + services: + - type: DNSClient + + - hostname: client_2 + type: computer + ip_address: 192.168.10.22 + subnet_mask: 255.255.255.0 + default_gateway: 192.168.10.1 + dns_server: 192.168.1.10 + applications: + - type: WebBrowser + options: + target_url: http://arcd.com/users/ + - type: DataManipulationBot + options: + port_scan_p_of_success: 0.8 + data_manipulation_p_of_success: 0.8 + payload: "DELETE" + server_ip: 192.168.1.14 + - type: DatabaseClient + options: + db_server_ip: 192.168.1.14 + services: + - type: DNSClient + + + + links: + - endpoint_a_hostname: router_1 + endpoint_a_port: 1 + endpoint_b_hostname: switch_1 + endpoint_b_port: 8 + - endpoint_a_hostname: router_1 + endpoint_a_port: 2 + endpoint_b_hostname: switch_2 + endpoint_b_port: 8 + - endpoint_a_hostname: switch_1 + endpoint_a_port: 1 + endpoint_b_hostname: domain_controller + endpoint_b_port: 1 + - endpoint_a_hostname: switch_1 + endpoint_a_port: 2 + endpoint_b_hostname: web_server + endpoint_b_port: 1 + - endpoint_a_hostname: switch_1 + endpoint_a_port: 3 + endpoint_b_hostname: database_server + endpoint_b_port: 1 + - endpoint_a_hostname: switch_1 + endpoint_a_port: 4 + endpoint_b_hostname: backup_server + endpoint_b_port: 1 + - endpoint_a_hostname: switch_1 + endpoint_a_port: 7 + endpoint_b_hostname: security_suite + endpoint_b_port: 1 + - endpoint_a_hostname: switch_2 + endpoint_a_port: 1 + endpoint_b_hostname: client_1 + endpoint_b_port: 1 + - endpoint_a_hostname: switch_2 + endpoint_a_port: 2 + endpoint_b_hostname: client_2 + endpoint_b_port: 1 + - endpoint_a_hostname: switch_2 + endpoint_a_port: 7 + endpoint_b_hostname: security_suite + endpoint_b_port: 2 diff --git a/tests/integration_tests/game_layer/test_rewards.py b/tests/integration_tests/game_layer/test_rewards.py index db2b0c3a..95e70271 100644 --- a/tests/integration_tests/game_layer/test_rewards.py +++ b/tests/integration_tests/game_layer/test_rewards.py @@ -2,7 +2,7 @@ import yaml from primaite.game.agent.interface import AgentHistoryItem -from primaite.game.agent.rewards import GreenAdminDatabaseUnreachablePenalty, WebpageUnavailablePenalty +from primaite.game.agent.rewards import ActionPenalty, GreenAdminDatabaseUnreachablePenalty, WebpageUnavailablePenalty from primaite.game.game import PrimaiteGame from primaite.session.environment import PrimaiteGymEnv from primaite.simulator.network.hardware.nodes.host.server import Server @@ -119,3 +119,67 @@ def test_shared_reward(): g2_reward = env.game.agents["client_2_green_user"].reward_function.current_reward blue_reward = env.game.agents["defender"].reward_function.current_reward assert blue_reward == g1_reward + g2_reward + + +def test_action_penalty_loads_from_config(): + """Test to ensure that action penalty is correctly loaded from config into PrimaiteGymEnv""" + CFG_PATH = TEST_ASSETS_ROOT / "configs/action_penalty.yaml" + with open(CFG_PATH, "r") as f: + cfg = yaml.safe_load(f) + + env = PrimaiteGymEnv(env_config=cfg) + + env.reset() + + ActionPenalty_Value = env.game.agents["defender"].reward_function.reward_components[2][0].penalty + CFG_Penalty_Value = cfg["agents"][3]["reward_function"]["reward_components"][2]["options"]["penalty_value"] + + assert ActionPenalty_Value == CFG_Penalty_Value + + +def test_action_penalty(game_and_agent): + """Test that the action penalty is correctly applied when agent performs any action""" + + # Create an ActionPenalty Reward + Penalty = ActionPenalty(agent_name="Test_Blue_Agent", penalty=-1.0) + + game, _ = game_and_agent + + server_1: Server = game.simulation.network.get_node_by_hostname("server_1") + server_1.software_manager.install(DatabaseService) + db_service = server_1.software_manager.software.get("DatabaseService") + db_service.start() + + client_1 = game.simulation.network.get_node_by_hostname("client_1") + client_1.software_manager.install(DatabaseClient) + db_client: DatabaseClient = client_1.software_manager.software.get("DatabaseClient") + db_client.configure(server_ip_address=server_1.network_interface[1].ip_address) + db_client.run() + + response = db_client.apply_request( + [ + "execute", + ] + ) + + state = game.get_sim_state() + + # Assert that penalty is applied if action isn't DONOTHING + reward_value = Penalty.calculate( + state, + last_action_response=AgentHistoryItem( + timestep=0, action="NODE_APPLICATION_EXECUTE", parameters={}, request=["execute"], response=response + ), + ) + + assert reward_value == -1.0 + + # Assert that no penalty applied for a DONOTHING action + reward_value = Penalty.calculate( + state, + last_action_response=AgentHistoryItem( + timestep=0, action="DONOTHING", parameters={}, request=["execute"], response=response + ), + ) + + assert reward_value == 0 From e204afff6f1c6526356cfd7d76d367b5361df6f0 Mon Sep 17 00:00:00 2001 From: Charlie Crane Date: Wed, 26 Jun 2024 20:58:52 +0100 Subject: [PATCH 4/5] #2656 - Removing the change to Data_Manipulation.yaml as this isn't necessary --- src/primaite/config/_package_data/data_manipulation.yaml | 6 ------ 1 file changed, 6 deletions(-) diff --git a/src/primaite/config/_package_data/data_manipulation.yaml b/src/primaite/config/_package_data/data_manipulation.yaml index be613918..f320c22f 100644 --- a/src/primaite/config/_package_data/data_manipulation.yaml +++ b/src/primaite/config/_package_data/data_manipulation.yaml @@ -739,12 +739,6 @@ agents: options: agent_name: client_2_green_user - - type: ACTION_PENALTY - weight: 1.0 - options: - agent_name: defender - penalty_value: -1 - agent_settings: flatten_obs: true From 7a680678aa4e69355f1c2a11bf2c8157f2bae321 Mon Sep 17 00:00:00 2001 From: Marek Wolan Date: Thu, 27 Jun 2024 12:01:32 +0100 Subject: [PATCH 5/5] #2656 - Make action penalty more configurable --- src/primaite/game/agent/rewards.py | 28 ++-- tests/assets/configs/action_penalty.yaml | 141 +----------------- .../game_layer/test_rewards.py | 80 +++++----- 3 files changed, 62 insertions(+), 187 deletions(-) diff --git a/src/primaite/game/agent/rewards.py b/src/primaite/game/agent/rewards.py index a0736bb0..4a17e9a5 100644 --- a/src/primaite/game/agent/rewards.py +++ b/src/primaite/game/agent/rewards.py @@ -363,33 +363,33 @@ class SharedReward(AbstractReward): class ActionPenalty(AbstractReward): """Apply a negative reward when taking any action except DONOTHING.""" - def __init__(self, agent_name: str, penalty: float): + def __init__(self, action_penalty: float, do_nothing_penalty: float) -> None: """ Initialise the reward. - This negative reward should be applied when the agent in training chooses to take any - action that isn't DONOTHING. + Reward or penalise agents for doing nothing or taking actions. + + :param action_penalty: Reward to give agents for taking any action except DONOTHING + :type action_penalty: float + :param do_nothing_penalty: Reward to give agent for taking the DONOTHING action + :type do_nothing_penalty: float """ - self.agent_name = agent_name - self.penalty = penalty + self.action_penalty = action_penalty + self.do_nothing_penalty = do_nothing_penalty def calculate(self, state: Dict, last_action_response: "AgentHistoryItem") -> float: """Calculate the penalty to be applied.""" if last_action_response.action == "DONOTHING": - # No penalty for doing nothing at present - return 0 + return self.do_nothing_penalty else: - _LOGGER.info( - f"Blue Agent has incurred a penalty of {self.penalty}, for action: {last_action_response.action}" - ) - return self.penalty + return self.action_penalty @classmethod def from_config(cls, config: Dict) -> "ActionPenalty": """Build the ActionPenalty object from config.""" - agent_name = config.get("agent_name") - penalty_value = config.get("penalty_value", 0) # default to 0. - return cls(agent_name=agent_name, penalty=penalty_value) + action_penalty = config.get("action_penalty", -1.0) + do_nothing_penalty = config.get("do_nothing_penalty", 0.0) + return cls(action_penalty=action_penalty, do_nothing_penalty=do_nothing_penalty) class RewardFunction: diff --git a/tests/assets/configs/action_penalty.yaml b/tests/assets/configs/action_penalty.yaml index 4eb562fe..1771ba5f 100644 --- a/tests/assets/configs/action_penalty.yaml +++ b/tests/assets/configs/action_penalty.yaml @@ -21,135 +21,6 @@ game: low: 0 agents: - - ref: client_2_green_user - team: GREEN - type: ProbabilisticAgent - agent_settings: - action_probabilities: - 0: 0.3 - 1: 0.6 - 2: 0.1 - observation_space: null - action_space: - action_list: - - type: DONOTHING - - type: NODE_APPLICATION_EXECUTE - options: - nodes: - - node_name: client_2 - applications: - - application_name: WebBrowser - - application_name: DatabaseClient - max_folders_per_node: 1 - max_files_per_folder: 1 - max_services_per_node: 1 - max_applications_per_node: 2 - action_map: - 0: - action: DONOTHING - options: {} - 1: - action: NODE_APPLICATION_EXECUTE - options: - node_id: 0 - application_id: 0 - 2: - action: NODE_APPLICATION_EXECUTE - options: - node_id: 0 - application_id: 1 - - reward_function: - reward_components: - - type: WEBPAGE_UNAVAILABLE_PENALTY - weight: 0.25 - options: - node_hostname: client_2 - - type: GREEN_ADMIN_DATABASE_UNREACHABLE_PENALTY - weight: 0.05 - options: - node_hostname: client_2 - - - ref: client_1_green_user - team: GREEN - type: ProbabilisticAgent - agent_settings: - action_probabilities: - 0: 0.3 - 1: 0.6 - 2: 0.1 - observation_space: null - action_space: - action_list: - - type: DONOTHING - - type: NODE_APPLICATION_EXECUTE - options: - nodes: - - node_name: client_1 - applications: - - application_name: WebBrowser - - application_name: DatabaseClient - max_folders_per_node: 1 - max_files_per_folder: 1 - max_services_per_node: 1 - max_applications_per_node: 2 - action_map: - 0: - action: DONOTHING - options: {} - 1: - action: NODE_APPLICATION_EXECUTE - options: - node_id: 0 - application_id: 0 - 2: - action: NODE_APPLICATION_EXECUTE - options: - node_id: 0 - application_id: 1 - - reward_function: - reward_components: - - type: WEBPAGE_UNAVAILABLE_PENALTY - weight: 0.25 - options: - node_hostname: client_1 - - type: GREEN_ADMIN_DATABASE_UNREACHABLE_PENALTY - weight: 0.05 - options: - node_hostname: client_1 - - - ref: data_manipulation_attacker - team: RED - type: RedDatabaseCorruptingAgent - - observation_space: null - - action_space: - action_list: - - type: DONOTHING - - type: NODE_APPLICATION_EXECUTE - options: - nodes: - - node_name: client_1 - applications: - - application_name: DataManipulationBot - - node_name: client_2 - applications: - - application_name: DataManipulationBot - max_folders_per_node: 1 - max_files_per_folder: 1 - max_services_per_node: 1 - - reward_function: - reward_components: - - type: DUMMY - - agent_settings: # options specific to this particular agent type, basically args of __init__(self) - start_settings: - start_step: 25 - frequency: 20 - variance: 5 - ref: defender team: BLUE @@ -712,19 +583,11 @@ agents: reward_function: reward_components: - - type: SHARED_REWARD - weight: 1.0 - options: - agent_name: client_1_green_user - - type: SHARED_REWARD - weight: 1.0 - options: - agent_name: client_2_green_user - type: ACTION_PENALTY weight: 1.0 options: - agent_name: defender - penalty_value: -1 + action_penalty: -0.75 + do_nothing_penalty: 0.125 agent_settings: diff --git a/tests/integration_tests/game_layer/test_rewards.py b/tests/integration_tests/game_layer/test_rewards.py index 95e70271..2bf551c8 100644 --- a/tests/integration_tests/game_layer/test_rewards.py +++ b/tests/integration_tests/game_layer/test_rewards.py @@ -1,9 +1,11 @@ # © Crown-owned copyright 2024, Defence Science and Technology Laboratory UK +import pytest import yaml from primaite.game.agent.interface import AgentHistoryItem from primaite.game.agent.rewards import ActionPenalty, GreenAdminDatabaseUnreachablePenalty, WebpageUnavailablePenalty from primaite.game.game import PrimaiteGame +from primaite.interface.request import RequestResponse from primaite.session.environment import PrimaiteGymEnv from primaite.simulator.network.hardware.nodes.host.server import Server from primaite.simulator.network.hardware.nodes.network.router import ACLAction, Router @@ -130,56 +132,66 @@ def test_action_penalty_loads_from_config(): env = PrimaiteGymEnv(env_config=cfg) env.reset() - - ActionPenalty_Value = env.game.agents["defender"].reward_function.reward_components[2][0].penalty - CFG_Penalty_Value = cfg["agents"][3]["reward_function"]["reward_components"][2]["options"]["penalty_value"] - - assert ActionPenalty_Value == CFG_Penalty_Value + defender = env.game.agents["defender"] + act_penalty_obj = None + for comp in defender.reward_function.reward_components: + if isinstance(comp[0], ActionPenalty): + act_penalty_obj = comp[0] + if act_penalty_obj is None: + pytest.fail("Action penalty reward component was not added to the agent from config.") + assert act_penalty_obj.action_penalty == -0.75 + assert act_penalty_obj.do_nothing_penalty == 0.125 -def test_action_penalty(game_and_agent): +def test_action_penalty(): """Test that the action penalty is correctly applied when agent performs any action""" # Create an ActionPenalty Reward - Penalty = ActionPenalty(agent_name="Test_Blue_Agent", penalty=-1.0) - - game, _ = game_and_agent - - server_1: Server = game.simulation.network.get_node_by_hostname("server_1") - server_1.software_manager.install(DatabaseService) - db_service = server_1.software_manager.software.get("DatabaseService") - db_service.start() - - client_1 = game.simulation.network.get_node_by_hostname("client_1") - client_1.software_manager.install(DatabaseClient) - db_client: DatabaseClient = client_1.software_manager.software.get("DatabaseClient") - db_client.configure(server_ip_address=server_1.network_interface[1].ip_address) - db_client.run() - - response = db_client.apply_request( - [ - "execute", - ] - ) - - state = game.get_sim_state() + Penalty = ActionPenalty(action_penalty=-0.75, do_nothing_penalty=0.125) # Assert that penalty is applied if action isn't DONOTHING reward_value = Penalty.calculate( - state, + state={}, last_action_response=AgentHistoryItem( - timestep=0, action="NODE_APPLICATION_EXECUTE", parameters={}, request=["execute"], response=response + timestep=0, + action="NODE_APPLICATION_EXECUTE", + parameters={"node_id": 0, "application_id": 1}, + request=["execute"], + response=RequestResponse.from_bool(True), ), ) - assert reward_value == -1.0 + assert reward_value == -0.75 # Assert that no penalty applied for a DONOTHING action reward_value = Penalty.calculate( - state, + state={}, last_action_response=AgentHistoryItem( - timestep=0, action="DONOTHING", parameters={}, request=["execute"], response=response + timestep=0, + action="DONOTHING", + parameters={}, + request=["do_nothing"], + response=RequestResponse.from_bool(True), ), ) - assert reward_value == 0 + assert reward_value == 0.125 + + +def test_action_penalty_e2e(game_and_agent): + """Test that we get the right reward for doing actions to fetch a website.""" + game, agent = game_and_agent + agent: ControlledAgent + comp = ActionPenalty(action_penalty=-0.75, do_nothing_penalty=0.125) + + agent.reward_function.register_component(comp, 1.0) + + action = ("DONOTHING", {}) + agent.store_action(action) + game.step() + assert agent.reward_function.current_reward == 0.125 + + action = ("NODE_FILE_SCAN", {"node_id": 0, "folder_id": 0, "file_id": 0}) + agent.store_action(action) + game.step() + assert agent.reward_function.current_reward == -0.75