From 95a0669e5c80cfcb4a26a5bc16b634f056d06737 Mon Sep 17 00:00:00 2001
From: Chris McCarthy <chris.mccarthy@methods.co.uk>
Date: Thu, 25 May 2023 11:58:54 +0100
Subject: [PATCH 1/2] #1378 - Re-arranged the action step function in the
 following order: 1. Implement the Blue Action 2. Perform any time-based
 activities 3. Apply PoL 4. Implement Red Action 5. Calculate reward signal 6.
 Output Verbose (currently disabled) 7. Update env_obs 8. Add transaction to
 the list of transactions

---
 src/primaite/environment/primaite_env.py | 52 +++++++-----------------
 1 file changed, 14 insertions(+), 38 deletions(-)

diff --git a/src/primaite/environment/primaite_env.py b/src/primaite/environment/primaite_env.py
index 115dd7e6..cde33007 100644
--- a/src/primaite/environment/primaite_env.py
+++ b/src/primaite/environment/primaite_env.py
@@ -343,10 +343,17 @@ class Primaite(Env):
         # Load the action space into the transaction
         transaction.set_action_space(copy.deepcopy(action))
 
-        # 1. Perform any time-based activities (e.g. a component moving from patching to good)
+        # 1. Implement Blue Action
+        self.interpret_action_and_apply(action)
+        # Take snapshots of nodes and links
+        self.nodes_post_red = copy.deepcopy(self.nodes)
+        self.links_post_red = copy.deepcopy(self.links)
+
+        # 2. Perform any time-based activities (e.g. a component moving from
+        # patching to good)
         self.apply_time_based_updates()
 
-        # 2. Apply PoL
+        # 3. Apply PoL
         apply_node_pol(self.nodes, self.node_pol, self.step_count)  # Node PoL
         apply_iers(
             self.network,
@@ -370,7 +377,7 @@ class Primaite(Env):
             self.step_count,
         )  # Network PoL
 
-        # 3. Implement Red Action
+        # 4. Implement Red Action
         apply_red_agent_iers(
             self.network,
             self.nodes,
@@ -382,39 +389,8 @@ class Primaite(Env):
         apply_red_agent_node_pol(
             self.nodes, self.red_iers, self.red_node_pol, self.step_count
         )
-        # Take snapshots of nodes and links
-        self.nodes_post_red = copy.deepcopy(self.nodes)
-        self.links_post_red = copy.deepcopy(self.links)
 
-        # 4. Implement Blue Action
-        self.interpret_action_and_apply(action)
-
-        # 5. Reapply normal and Red agent IER PoL, as we need to see what
-        # effect the blue agent action has had (if any) on link status
-        # Need to clear traffic on all links first
-        for link_key, link_value in self.links.items():
-            link_value.clear_traffic()
-        apply_iers(
-            self.network,
-            self.nodes,
-            self.links,
-            self.green_iers,
-            self.acl,
-            self.step_count,
-        )
-        apply_red_agent_iers(
-            self.network,
-            self.nodes,
-            self.links,
-            self.red_iers,
-            self.acl,
-            self.step_count,
-        )
-        # Take snapshots of nodes and links
-        self.nodes_post_blue = copy.deepcopy(self.nodes)
-        self.links_post_blue = copy.deepcopy(self.links)
-
-        # 6. Calculate reward signal (for RL)
+        # 5. Calculate reward signal (for RL)
         reward = calculate_reward_function(
             self.nodes_post_pol,
             self.nodes_post_blue,
@@ -436,15 +412,15 @@ class Primaite(Env):
         # Load the reward into the transaction
         transaction.set_reward(reward)
 
-        # 7. Output Verbose
+        # 6. Output Verbose
         # self.output_link_status()
 
-        # 8. Update env_obs
+        # 7. Update env_obs
         self.update_environent_obs()
         # Load the new observation space into the transaction
         transaction.set_obs_space_post(copy.deepcopy(self.env_obs))
 
-        # 9. Add the transaction to the list of transactions
+        # 8. Add the transaction to the list of transactions
         self.transaction_list.append(copy.deepcopy(transaction))
 
         # Return

From 7bbdbd69977d5ac50c64fce5638bcf52700beb30 Mon Sep 17 00:00:00 2001
From: Chris McCarthy <chris.mccarthy@methods.co.uk>
Date: Thu, 25 May 2023 12:37:42 +0100
Subject: [PATCH 2/2] #1378 - Re-added post blue and snapshots

---
 src/primaite/environment/primaite_env.py | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/src/primaite/environment/primaite_env.py b/src/primaite/environment/primaite_env.py
index cde33007..3510db21 100644
--- a/src/primaite/environment/primaite_env.py
+++ b/src/primaite/environment/primaite_env.py
@@ -346,11 +346,10 @@ class Primaite(Env):
         # 1. Implement Blue Action
         self.interpret_action_and_apply(action)
         # Take snapshots of nodes and links
-        self.nodes_post_red = copy.deepcopy(self.nodes)
-        self.links_post_red = copy.deepcopy(self.links)
+        self.nodes_post_blue = copy.deepcopy(self.nodes)
+        self.links_post_blue = copy.deepcopy(self.links)
 
-        # 2. Perform any time-based activities (e.g. a component moving from
-        # patching to good)
+        # 2. Perform any time-based activities (e.g. a component moving from patching to good)
         self.apply_time_based_updates()
 
         # 3. Apply PoL
@@ -389,6 +388,9 @@ class Primaite(Env):
         apply_red_agent_node_pol(
             self.nodes, self.red_iers, self.red_node_pol, self.step_count
         )
+        # Take snapshots of nodes and links
+        self.nodes_post_red = copy.deepcopy(self.nodes)
+        self.links_post_red = copy.deepcopy(self.links)
 
         # 5. Calculate reward signal (for RL)
         reward = calculate_reward_function(
@@ -409,7 +411,7 @@ class Primaite(Env):
                 # step count is reached in order to prevent neverending episode
                 done = True
             print("Average reward: " + str(self.average_reward))
-        # Load the reward into the transaction
+            # Load the reward into the transaction
         transaction.set_reward(reward)
 
         # 6. Output Verbose