From feead2cd44d712806f6741ea7ed27cc9cdf53a02 Mon Sep 17 00:00:00 2001 From: Marek Wolan Date: Tue, 27 Jun 2023 11:10:21 +0100 Subject: [PATCH 1/6] Fix reference IERs --- src/primaite/environment/primaite_env.py | 18 +++++++++++++++++- src/primaite/environment/reward.py | 10 +++++++++- 2 files changed, 26 insertions(+), 2 deletions(-) diff --git a/src/primaite/environment/primaite_env.py b/src/primaite/environment/primaite_env.py index da235971..1307a930 100644 --- a/src/primaite/environment/primaite_env.py +++ b/src/primaite/environment/primaite_env.py @@ -109,6 +109,7 @@ class Primaite(Env): # Create a dictionary to hold all the green IERs (this will come from an external source) self.green_iers: Dict[str, IER] = {} + self.green_iers_reference: Dict[str, IER] = {} # Create a dictionary to hold all the node PoLs (this will come from an external source) self.node_pol = {} @@ -310,6 +311,9 @@ class Primaite(Env): # Need to clear traffic on all links first for link_key, link_value in self.links.items(): link_value.clear_traffic() + + for link in self.links_reference.values(): + link.clear_traffic() # Create a Transaction (metric) object for this step transaction = Transaction( @@ -348,7 +352,7 @@ class Primaite(Env): self.network_reference, self.nodes_reference, self.links_reference, - self.green_iers, + self.green_iers_reference, self.acl, self.step_count, ) # Network PoL @@ -375,6 +379,7 @@ class Primaite(Env): self.nodes_post_red, self.nodes_reference, self.green_iers, + self.green_iers_reference, self.red_iers, self.step_count, self.training_config, @@ -866,6 +871,17 @@ class Primaite(Env): ier_destination, ier_mission_criticality, ) + self.green_iers_reference[ier_id] = IER( + ier_id, + ier_start_step, + ier_end_step, + ier_load, + ier_protocol, + ier_port, + ier_source, + ier_destination, + ier_mission_criticality, + ) def create_red_ier(self, item): """ diff --git a/src/primaite/environment/reward.py b/src/primaite/environment/reward.py index a620f9b3..777dcf74 100644 --- a/src/primaite/environment/reward.py +++ b/src/primaite/environment/reward.py @@ -6,6 +6,9 @@ from primaite.common.enums import FileSystemState, HardwareState, SoftwareState from primaite.common.service import Service from primaite.nodes.active_node import ActiveNode from primaite.nodes.service_node import ServiceNode +from primaite import getLogger + +_LOGGER = getLogger(__name__) def calculate_reward_function( @@ -13,6 +16,7 @@ def calculate_reward_function( final_nodes, reference_nodes, green_iers, + green_iers_reference, red_iers, step_count, config_values, @@ -68,11 +72,15 @@ def calculate_reward_function( reward_value += config_values.red_ier_running # Go through each green IER - penalise if it's not running (weighted) + # but only if it's supposed to be running (it's running in reference) for ier_key, ier_value in green_iers.items(): + ref_ier = green_iers_reference[ier_key] start_step = ier_value.get_start_step() stop_step = ier_value.get_end_step() if step_count >= start_step and step_count <= stop_step: - if not ier_value.get_is_running(): + if not ier_value.get_is_running() and ref_ier.get_is_running(): + # what should happen if reference IER is blocked but live IER is running? + _LOGGER.debug(f"Applying penalty of {config_values.green_ier_blocked * ier_value.get_mission_criticality()} due to IER {ier_key} being blocked") reward_value += ( config_values.green_ier_blocked * ier_value.get_mission_criticality() From e2d6abf83312eda01c9476f594570971cfd1b068 Mon Sep 17 00:00:00 2001 From: Marek Wolan Date: Tue, 27 Jun 2023 11:20:18 +0100 Subject: [PATCH 2/6] apply pre-commits --- src/primaite/environment/primaite_env.py | 24 +++++++++++------------- src/primaite/environment/reward.py | 10 ++++++++-- 2 files changed, 19 insertions(+), 15 deletions(-) diff --git a/src/primaite/environment/primaite_env.py b/src/primaite/environment/primaite_env.py index 1307a930..bdfe00dd 100644 --- a/src/primaite/environment/primaite_env.py +++ b/src/primaite/environment/primaite_env.py @@ -14,8 +14,7 @@ from gym import Env, spaces from matplotlib import pyplot as plt from primaite.acl.access_control_list import AccessControlList -from primaite.agents.utils import is_valid_acl_action_extra, \ - is_valid_node_action +from primaite.agents.utils import is_valid_acl_action_extra, is_valid_node_action from primaite.common.custom_typing import NodeUnion from primaite.common.enums import ( ActionType, @@ -24,8 +23,9 @@ from primaite.common.enums import ( NodePOLInitiator, NodePOLType, NodeType, + ObservationType, Priority, - SoftwareState, ObservationType, + SoftwareState, ) from primaite.common.service import Service from primaite.config import training_config @@ -35,15 +35,13 @@ from primaite.environment.reward import calculate_reward_function from primaite.links.link import Link from primaite.nodes.active_node import ActiveNode from primaite.nodes.node import Node -from primaite.nodes.node_state_instruction_green import \ - NodeStateInstructionGreen +from primaite.nodes.node_state_instruction_green import NodeStateInstructionGreen from primaite.nodes.node_state_instruction_red import NodeStateInstructionRed from primaite.nodes.passive_node import PassiveNode from primaite.nodes.service_node import ServiceNode from primaite.pol.green_pol import apply_iers, apply_node_pol from primaite.pol.ier import IER -from primaite.pol.red_agent_pol import apply_red_agent_iers, \ - apply_red_agent_node_pol +from primaite.pol.red_agent_pol import apply_red_agent_iers, apply_red_agent_node_pol from primaite.transactions.transaction import Transaction _LOGGER = logging.getLogger(__name__) @@ -178,7 +176,6 @@ class Primaite(Env): # It will be initialised later. self.obs_handler: ObservationsHandler - # Open the config file and build the environment laydown with open(self._lay_down_config_path, "r") as file: # Open the config file and build the environment laydown @@ -200,7 +197,7 @@ class Primaite(Env): try: plt.tight_layout() nx.draw_networkx(self.network, with_labels=True) - now = datetime.now() # current date and time + # now = datetime.now() # current date and time file_path = session_path / f"network_{timestamp_str}.png" plt.savefig(file_path, format="PNG") @@ -239,7 +236,9 @@ class Primaite(Env): self.action_dict = self.create_node_and_acl_action_dict() self.action_space = spaces.Discrete(len(self.action_dict)) else: - _LOGGER.info(f"Invalid action type selected: {self.training_config.action_type}") + _LOGGER.info( + f"Invalid action type selected: {self.training_config.action_type}" + ) # Set up a csv to store the results of the training try: header = ["Episode", "Average Reward"] @@ -311,7 +310,7 @@ class Primaite(Env): # Need to clear traffic on all links first for link_key, link_value in self.links.items(): link_value.clear_traffic() - + for link in self.links_reference.values(): link.clear_traffic() @@ -384,7 +383,7 @@ class Primaite(Env): self.step_count, self.training_config, ) - #print(f" Step {self.step_count} Reward: {str(reward)}") + # print(f" Step {self.step_count} Reward: {str(reward)}") self.total_reward += reward if self.step_count == self.episode_steps: self.average_reward = self.total_reward / self.step_count @@ -1049,7 +1048,6 @@ class Primaite(Env): """ self.observation_type = ObservationType[observation_info["type"]] - def get_action_info(self, action_info): """ Extracts action_info. diff --git a/src/primaite/environment/reward.py b/src/primaite/environment/reward.py index 777dcf74..f48db259 100644 --- a/src/primaite/environment/reward.py +++ b/src/primaite/environment/reward.py @@ -2,11 +2,11 @@ """Implements reward function.""" from typing import Dict +from primaite import getLogger from primaite.common.enums import FileSystemState, HardwareState, SoftwareState from primaite.common.service import Service from primaite.nodes.active_node import ActiveNode from primaite.nodes.service_node import ServiceNode -from primaite import getLogger _LOGGER = getLogger(__name__) @@ -80,7 +80,13 @@ def calculate_reward_function( if step_count >= start_step and step_count <= stop_step: if not ier_value.get_is_running() and ref_ier.get_is_running(): # what should happen if reference IER is blocked but live IER is running? - _LOGGER.debug(f"Applying penalty of {config_values.green_ier_blocked * ier_value.get_mission_criticality()} due to IER {ier_key} being blocked") + _LOGGER.debug( + ( + f"Applying penalty of " + f"{config_values.green_ier_blocked * ier_value.get_mission_criticality()} " + f"due to IER {ier_key} being blocked" + ) + ) reward_value += ( config_values.green_ier_blocked * ier_value.get_mission_criticality() From ebc0a28460c15b1d6b0c764db3beaad87ce72e8d Mon Sep 17 00:00:00 2001 From: Marek Wolan Date: Tue, 27 Jun 2023 10:45:45 +0000 Subject: [PATCH 3/6] rename to prevent confusion --- src/primaite/environment/reward.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/primaite/environment/reward.py b/src/primaite/environment/reward.py index f48db259..00ae3528 100644 --- a/src/primaite/environment/reward.py +++ b/src/primaite/environment/reward.py @@ -74,11 +74,11 @@ def calculate_reward_function( # Go through each green IER - penalise if it's not running (weighted) # but only if it's supposed to be running (it's running in reference) for ier_key, ier_value in green_iers.items(): - ref_ier = green_iers_reference[ier_key] + reference_ier = green_iers_reference[ier_key] start_step = ier_value.get_start_step() stop_step = ier_value.get_end_step() if step_count >= start_step and step_count <= stop_step: - if not ier_value.get_is_running() and ref_ier.get_is_running(): + if not ier_value.get_is_running() and reference_ier.get_is_running(): # what should happen if reference IER is blocked but live IER is running? _LOGGER.debug( ( From 79ecb8e0b9c85433c654b59bfaaa164ce97541d8 Mon Sep 17 00:00:00 2001 From: Marek Wolan Date: Tue, 27 Jun 2023 12:44:42 +0100 Subject: [PATCH 4/6] More descriptive debug msg --- src/primaite/environment/reward.py | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/src/primaite/environment/reward.py b/src/primaite/environment/reward.py index 00ae3528..0befd547 100644 --- a/src/primaite/environment/reward.py +++ b/src/primaite/environment/reward.py @@ -78,19 +78,23 @@ def calculate_reward_function( start_step = ier_value.get_start_step() stop_step = ier_value.get_end_step() if step_count >= start_step and step_count <= stop_step: - if not ier_value.get_is_running() and reference_ier.get_is_running(): - # what should happen if reference IER is blocked but live IER is running? - _LOGGER.debug( - ( - f"Applying penalty of " - f"{config_values.green_ier_blocked * ier_value.get_mission_criticality()} " - f"due to IER {ier_key} being blocked" + if not ier_value.get_is_running(): + if reference_ier.get_is_running(): + ier_reward = ( + config_values.green_ier_blocked + * ier_value.get_mission_criticality() + ) + _LOGGER.debug( + f"Applying reward of {ier_reward} because IER {ier_key} is blocked" + ) + reward_value += ier_reward + else: + _LOGGER.debug( + ( + f"IER {ier_key} is blocked in the reference and live environments. " + f"Therefore, no penalty was applied." + ) ) - ) - reward_value += ( - config_values.green_ier_blocked - * ier_value.get_mission_criticality() - ) return reward_value From 0bff2d2f36b748207d020c5a2c1443a09b219295 Mon Sep 17 00:00:00 2001 From: Marek Wolan Date: Tue, 27 Jun 2023 12:56:15 +0100 Subject: [PATCH 5/6] Improve readability --- src/primaite/environment/reward.py | 38 ++++++++++++++++++------------ 1 file changed, 23 insertions(+), 15 deletions(-) diff --git a/src/primaite/environment/reward.py b/src/primaite/environment/reward.py index 0befd547..aa9e4503 100644 --- a/src/primaite/environment/reward.py +++ b/src/primaite/environment/reward.py @@ -78,23 +78,31 @@ def calculate_reward_function( start_step = ier_value.get_start_step() stop_step = ier_value.get_end_step() if step_count >= start_step and step_count <= stop_step: - if not ier_value.get_is_running(): - if reference_ier.get_is_running(): - ier_reward = ( - config_values.green_ier_blocked - * ier_value.get_mission_criticality() + reference_blocked = reference_ier.get_is_running() + live_blocked = ier_value.get_is_running() + ier_reward = ( + config_values.green_ier_blocked * ier_value.get_mission_criticality() + ) + + if live_blocked and not reference_blocked: + _LOGGER.debug( + f"Applying reward of {ier_reward} because IER {ier_key} is blocked" + ) + reward_value += ier_reward + elif live_blocked and reference_blocked: + _LOGGER.debug( + ( + f"IER {ier_key} is blocked in the reference and live environments. " + f"Penalty of {ier_reward} was NOT applied." ) - _LOGGER.debug( - f"Applying reward of {ier_reward} because IER {ier_key} is blocked" - ) - reward_value += ier_reward - else: - _LOGGER.debug( - ( - f"IER {ier_key} is blocked in the reference and live environments. " - f"Therefore, no penalty was applied." - ) + ) + elif not live_blocked and reference_blocked: + _LOGGER.debug( + ( + f"IER {ier_key} is blocked in the reference env but not in the live one. " + f"Penalty of {ier_reward} was NOT applied." ) + ) return reward_value From cffdcdc0d21184f1622e711fb820b368927146df Mon Sep 17 00:00:00 2001 From: Marek Wolan Date: Tue, 27 Jun 2023 15:27:56 +0100 Subject: [PATCH 6/6] Fix ier reward calculation --- src/primaite/environment/reward.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/primaite/environment/reward.py b/src/primaite/environment/reward.py index aa9e4503..1a1a0770 100644 --- a/src/primaite/environment/reward.py +++ b/src/primaite/environment/reward.py @@ -78,8 +78,8 @@ def calculate_reward_function( start_step = ier_value.get_start_step() stop_step = ier_value.get_end_step() if step_count >= start_step and step_count <= stop_step: - reference_blocked = reference_ier.get_is_running() - live_blocked = ier_value.get_is_running() + reference_blocked = not reference_ier.get_is_running() + live_blocked = not ier_value.get_is_running() ier_reward = ( config_values.green_ier_blocked * ier_value.get_mission_criticality() )