From 95a0669e5c80cfcb4a26a5bc16b634f056d06737 Mon Sep 17 00:00:00 2001 From: Chris McCarthy Date: Thu, 25 May 2023 11:58:54 +0100 Subject: [PATCH 1/2] #1378 - Re-arranged the action step function in the following order: 1. Implement the Blue Action 2. Perform any time-based activities 3. Apply PoL 4. Implement Red Action 5. Calculate reward signal 6. Output Verbose (currently disabled) 7. Update env_obs 8. Add transaction to the list of transactions --- src/primaite/environment/primaite_env.py | 52 +++++++----------------- 1 file changed, 14 insertions(+), 38 deletions(-) diff --git a/src/primaite/environment/primaite_env.py b/src/primaite/environment/primaite_env.py index 115dd7e6..cde33007 100644 --- a/src/primaite/environment/primaite_env.py +++ b/src/primaite/environment/primaite_env.py @@ -343,10 +343,17 @@ class Primaite(Env): # Load the action space into the transaction transaction.set_action_space(copy.deepcopy(action)) - # 1. Perform any time-based activities (e.g. a component moving from patching to good) + # 1. Implement Blue Action + self.interpret_action_and_apply(action) + # Take snapshots of nodes and links + self.nodes_post_red = copy.deepcopy(self.nodes) + self.links_post_red = copy.deepcopy(self.links) + + # 2. Perform any time-based activities (e.g. a component moving from + # patching to good) self.apply_time_based_updates() - # 2. Apply PoL + # 3. Apply PoL apply_node_pol(self.nodes, self.node_pol, self.step_count) # Node PoL apply_iers( self.network, @@ -370,7 +377,7 @@ class Primaite(Env): self.step_count, ) # Network PoL - # 3. Implement Red Action + # 4. Implement Red Action apply_red_agent_iers( self.network, self.nodes, @@ -382,39 +389,8 @@ class Primaite(Env): apply_red_agent_node_pol( self.nodes, self.red_iers, self.red_node_pol, self.step_count ) - # Take snapshots of nodes and links - self.nodes_post_red = copy.deepcopy(self.nodes) - self.links_post_red = copy.deepcopy(self.links) - # 4. Implement Blue Action - self.interpret_action_and_apply(action) - - # 5. Reapply normal and Red agent IER PoL, as we need to see what - # effect the blue agent action has had (if any) on link status - # Need to clear traffic on all links first - for link_key, link_value in self.links.items(): - link_value.clear_traffic() - apply_iers( - self.network, - self.nodes, - self.links, - self.green_iers, - self.acl, - self.step_count, - ) - apply_red_agent_iers( - self.network, - self.nodes, - self.links, - self.red_iers, - self.acl, - self.step_count, - ) - # Take snapshots of nodes and links - self.nodes_post_blue = copy.deepcopy(self.nodes) - self.links_post_blue = copy.deepcopy(self.links) - - # 6. Calculate reward signal (for RL) + # 5. Calculate reward signal (for RL) reward = calculate_reward_function( self.nodes_post_pol, self.nodes_post_blue, @@ -436,15 +412,15 @@ class Primaite(Env): # Load the reward into the transaction transaction.set_reward(reward) - # 7. Output Verbose + # 6. Output Verbose # self.output_link_status() - # 8. Update env_obs + # 7. Update env_obs self.update_environent_obs() # Load the new observation space into the transaction transaction.set_obs_space_post(copy.deepcopy(self.env_obs)) - # 9. Add the transaction to the list of transactions + # 8. Add the transaction to the list of transactions self.transaction_list.append(copy.deepcopy(transaction)) # Return From 7bbdbd69977d5ac50c64fce5638bcf52700beb30 Mon Sep 17 00:00:00 2001 From: Chris McCarthy Date: Thu, 25 May 2023 12:37:42 +0100 Subject: [PATCH 2/2] #1378 - Re-added post blue and snapshots --- src/primaite/environment/primaite_env.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/primaite/environment/primaite_env.py b/src/primaite/environment/primaite_env.py index cde33007..3510db21 100644 --- a/src/primaite/environment/primaite_env.py +++ b/src/primaite/environment/primaite_env.py @@ -346,11 +346,10 @@ class Primaite(Env): # 1. Implement Blue Action self.interpret_action_and_apply(action) # Take snapshots of nodes and links - self.nodes_post_red = copy.deepcopy(self.nodes) - self.links_post_red = copy.deepcopy(self.links) + self.nodes_post_blue = copy.deepcopy(self.nodes) + self.links_post_blue = copy.deepcopy(self.links) - # 2. Perform any time-based activities (e.g. a component moving from - # patching to good) + # 2. Perform any time-based activities (e.g. a component moving from patching to good) self.apply_time_based_updates() # 3. Apply PoL @@ -389,6 +388,9 @@ class Primaite(Env): apply_red_agent_node_pol( self.nodes, self.red_iers, self.red_node_pol, self.step_count ) + # Take snapshots of nodes and links + self.nodes_post_red = copy.deepcopy(self.nodes) + self.links_post_red = copy.deepcopy(self.links) # 5. Calculate reward signal (for RL) reward = calculate_reward_function( @@ -409,7 +411,7 @@ class Primaite(Env): # step count is reached in order to prevent neverending episode done = True print("Average reward: " + str(self.average_reward)) - # Load the reward into the transaction + # Load the reward into the transaction transaction.set_reward(reward) # 6. Output Verbose