From 47ed585ee2e4cbc2acbb962bf57fca8f9a31257b Mon Sep 17 00:00:00 2001
From: Charlie Crane <Charlie.Crane@t-s.qinetiq.com>
Date: Thu, 12 Dec 2024 16:08:11 +0000
Subject: [PATCH] #2912 - Replace DONOTHING reference with do_nothing, tweaks
 following milpac actions

---
 src/primaite/_legacy/actions.py               | 20 +++++++++++--------
 src/primaite/game/agent/actions/manager.py    |  5 ++++-
 src/primaite/game/agent/actions/node.py       |  3 ++-
 src/primaite/game/agent/rewards.py            |  8 ++++----
 .../scripted_agents/data_manipulation_bot.py  |  2 +-
 .../agent/scripted_agents/random_agent.py     |  2 +-
 .../game/agent/scripted_agents/tap001.py      |  2 +-
 .../Data-Manipulation-E2E-Demonstration.ipynb | 14 ++++++-------
 .../actions/test_c2_suite_actions.py          |  2 +-
 .../actions/test_node_request_permission.py   |  6 +++---
 .../game_layer/test_RNG_seed.py               |  8 ++++----
 .../game_layer/test_action_mask.py            |  2 +-
 .../game_layer/test_actions.py                |  2 +-
 .../game_layer/test_rewards.py                | 10 +++++-----
 .../_primaite/_game/_agent/test_actions.py    |  6 +++---
 .../_game/_agent/test_sticky_rewards.py       | 16 +++++++--------
 16 files changed, 58 insertions(+), 50 deletions(-)

diff --git a/src/primaite/_legacy/actions.py b/src/primaite/_legacy/actions.py
index 64cbe0cf..0eda7d86 100644
--- a/src/primaite/_legacy/actions.py
+++ b/src/primaite/_legacy/actions.py
@@ -455,11 +455,12 @@ class NodeAbstractAction(AbstractAction):
     Any action which applies to a node and uses node_id as its only parameter can inherit from this base class.
     """
 
-    @abstractmethod
-    def __init__(self, manager: "ActionManager", num_nodes: int, **kwargs) -> None:
-        super().__init__(manager=manager)
-        self.shape: Dict[str, int] = {"node_id": num_nodes}
-        self.verb: str  # define but don't initialise: defends against children classes not defining this
+    config: "NodeAbstractAction.ConfigSchema"
+
+    class ConfigSchema(AbstractAction.ConfigSchema):
+        """Configuration schema for NodeAbstractAction."""
+
+        verb: str = "Node_Abstract_Action"
 
     def form_request(self, node_id: int) -> RequestFormat:
         """Return the action formatted as a request which can be ingested by the PrimAITE simulation."""
@@ -478,9 +479,12 @@ class NodeOSScanAction(NodeAbstractAction):
 class NodeShutdownAction(NodeAbstractAction):
     """Action which shuts down a node."""
 
-    def __init__(self, manager: "ActionManager", num_nodes: int, **kwargs) -> None:
-        super().__init__(manager=manager, num_nodes=num_nodes)
-        self.verb: str = "shutdown"
+    config: "NodeShutdownAction.ConfigSchema"
+
+    class ConfigSchema(NodeAbstractAction.ConfigSchema):
+        """Configuration Schema for NodeShutdownAction."""
+
+        verb: str = "shutdown"
 
 
 class NodeStartupAction(NodeAbstractAction):
diff --git a/src/primaite/game/agent/actions/manager.py b/src/primaite/game/agent/actions/manager.py
index b89704f4..a6a4f5a6 100644
--- a/src/primaite/game/agent/actions/manager.py
+++ b/src/primaite/game/agent/actions/manager.py
@@ -28,7 +28,7 @@ class DoNothingAction(AbstractAction, identifier="do_nothing"):
     """Do Nothing Action."""
 
     class ConfigSchema(AbstractAction.ConfigSchema):
-        """Configuration Schema for DoNothingAction."""
+        """Configuration Schema for do_nothingAction."""
 
         type: str = "do_nothing"
 
@@ -44,6 +44,7 @@ class ActionManager:
     def __init__(
         self,
         actions: List[Dict],  # stores list of actions available to agent
+        nodes: List[Dict],  # extra configuration for each node
         act_map: Optional[
             Dict[int, Dict]
         ] = None,  # allows restricting set of possible actions - TODO: Refactor to be a list?
@@ -79,6 +80,8 @@ class ActionManager:
             self.action_map = {i: (a["action"], a["options"]) for i, a in act_map.items()}
         # make sure all numbers between 0 and N are represented as dict keys in action map
         assert all([i in self.action_map.keys() for i in range(len(self.action_map))])
+        self.node_names: List[str] = [n["node_name"] for n in nodes]
+        """List of node names in this action space. The list order is the mapping between node index and node name."""
 
     def get_action(self, action: int) -> Tuple[str, Dict]:
         """Produce action in CAOS format."""
diff --git a/src/primaite/game/agent/actions/node.py b/src/primaite/game/agent/actions/node.py
index 4ecc1393..480cb8da 100644
--- a/src/primaite/game/agent/actions/node.py
+++ b/src/primaite/game/agent/actions/node.py
@@ -34,7 +34,8 @@ class NodeAbstractAction(AbstractAction, identifier="node_abstract"):
     @classmethod
     def form_request(cls, config: ConfigSchema) -> RequestFormat:
         """Return the action formatted as a request which can be ingested by the PrimAITE simulation."""
-        return ["network", "node", config.node_name, cls.config.verb]
+        print(config)
+        return ["network", "node", config.node_name, config.verb]
 
 
 class NodeOSScanAction(NodeAbstractAction, identifier="node_os_scan"):
diff --git a/src/primaite/game/agent/rewards.py b/src/primaite/game/agent/rewards.py
index 1de34b40..f528c851 100644
--- a/src/primaite/game/agent/rewards.py
+++ b/src/primaite/game/agent/rewards.py
@@ -447,7 +447,7 @@ class SharedReward(AbstractReward):
 
 
 class ActionPenalty(AbstractReward):
-    """Apply a negative reward when taking any action except DONOTHING."""
+    """Apply a negative reward when taking any action except do_nothing."""
 
     def __init__(self, action_penalty: float, do_nothing_penalty: float) -> None:
         """
@@ -455,9 +455,9 @@ class ActionPenalty(AbstractReward):
 
         Reward or penalise agents for doing nothing or taking actions.
 
-        :param action_penalty: Reward to give agents for taking any action except DONOTHING
+        :param action_penalty: Reward to give agents for taking any action except do_nothing
         :type action_penalty: float
-        :param do_nothing_penalty: Reward to give agent for taking the DONOTHING action
+        :param do_nothing_penalty: Reward to give agent for taking the do_nothing action
         :type do_nothing_penalty: float
         """
         self.action_penalty = action_penalty
@@ -473,7 +473,7 @@ class ActionPenalty(AbstractReward):
         :return: Reward value
         :rtype: float
         """
-        if last_action_response.action == "DONOTHING":
+        if last_action_response.action == "do_nothing":
             return self.do_nothing_penalty
         else:
             return self.action_penalty
diff --git a/src/primaite/game/agent/scripted_agents/data_manipulation_bot.py b/src/primaite/game/agent/scripted_agents/data_manipulation_bot.py
index 129fac1a..c245d687 100644
--- a/src/primaite/game/agent/scripted_agents/data_manipulation_bot.py
+++ b/src/primaite/game/agent/scripted_agents/data_manipulation_bot.py
@@ -39,7 +39,7 @@ class DataManipulationAgent(AbstractScriptedAgent):
         """
         if timestep < self.next_execution_timestep:
             self.logger.debug(msg="Performing do NOTHING")
-            return "DONOTHING", {}
+            return "do_nothing", {}
 
         self._set_next_execution_timestep(timestep + self.agent_settings.start_settings.frequency)
         self.logger.info(msg="Performing a data manipulation attack!")
diff --git a/src/primaite/game/agent/scripted_agents/random_agent.py b/src/primaite/game/agent/scripted_agents/random_agent.py
index df9273f7..eade3a0c 100644
--- a/src/primaite/game/agent/scripted_agents/random_agent.py
+++ b/src/primaite/game/agent/scripted_agents/random_agent.py
@@ -81,4 +81,4 @@ class PeriodicAgent(AbstractScriptedAgent):
             self._set_next_execution_timestep(timestep + self.settings.frequency, self.settings.variance)
             return "NODE_APPLICATION_EXECUTE", {"node_id": 0, "application_id": 0}
 
-        return "DONOTHING", {}
+        return "do_nothing", {}
diff --git a/src/primaite/game/agent/scripted_agents/tap001.py b/src/primaite/game/agent/scripted_agents/tap001.py
index c4f6062a..6d370654 100644
--- a/src/primaite/game/agent/scripted_agents/tap001.py
+++ b/src/primaite/game/agent/scripted_agents/tap001.py
@@ -46,7 +46,7 @@ class TAP001(AbstractScriptedAgent):
         :rtype: Tuple[str, Dict]
         """
         if timestep < self.next_execution_timestep:
-            return "DONOTHING", {}
+            return "do_nothing", {}
 
         self._set_next_execution_timestep(timestep + self.agent_settings.start_settings.frequency)
 
diff --git a/src/primaite/notebooks/Data-Manipulation-E2E-Demonstration.ipynb b/src/primaite/notebooks/Data-Manipulation-E2E-Demonstration.ipynb
index 0460f771..89620215 100644
--- a/src/primaite/notebooks/Data-Manipulation-E2E-Demonstration.ipynb
+++ b/src/primaite/notebooks/Data-Manipulation-E2E-Demonstration.ipynb
@@ -165,13 +165,13 @@
     "\n",
     "| node_id | node name        |\n",
     "|---------|------------------|\n",
-    "| 1       | domain_controller|\n",
-    "| 2       | web_server       |\n",
-    "| 3       | database_server  |\n",
-    "| 4       | backup_server    |\n",
-    "| 5       | security_suite   |\n",
-    "| 6       | client_1         |\n",
-    "| 7       | client_2         |\n",
+    "| 0       | domain_controller|\n",
+    "| 1       | web_server       |\n",
+    "| 2       | database_server  |\n",
+    "| 3       | backup_server    |\n",
+    "| 4       | security_suite   |\n",
+    "| 5       | client_1         |\n",
+    "| 6       | client_2         |\n",
     "\n",
     "Service 1 on node 2 (web_server) corresponds to the Web Server service. Other services are only there for padding to ensure that each node's observation space has the same shape. They are filled with zeroes.\n",
     "\n",
diff --git a/tests/integration_tests/game_layer/actions/test_c2_suite_actions.py b/tests/integration_tests/game_layer/actions/test_c2_suite_actions.py
index 187fb1fe..d73c9834 100644
--- a/tests/integration_tests/game_layer/actions/test_c2_suite_actions.py
+++ b/tests/integration_tests/game_layer/actions/test_c2_suite_actions.py
@@ -134,7 +134,7 @@ def test_c2_server_ransomware(game_and_agent_fixture: Tuple[PrimaiteGame, ProxyA
 
     # Stepping a few timesteps to allow for the RansowmareScript to finish installing.
 
-    action = ("DONOTHING", {})
+    action = ("do_nothing", {})
     agent.store_action(action)
     game.step()
     game.step()
diff --git a/tests/integration_tests/game_layer/actions/test_node_request_permission.py b/tests/integration_tests/game_layer/actions/test_node_request_permission.py
index fdf04ad5..c34103bc 100644
--- a/tests/integration_tests/game_layer/actions/test_node_request_permission.py
+++ b/tests/integration_tests/game_layer/actions/test_node_request_permission.py
@@ -36,7 +36,7 @@ def test_node_startup_shutdown(game_and_agent_fixture: Tuple[PrimaiteGame, Proxy
     assert client_1.operating_state == NodeOperatingState.SHUTTING_DOWN
 
     for i in range(client_1.shut_down_duration + 1):
-        action = ("DONOTHING", {"node_id": 0})
+        action = ("do_nothing", {"node_id": 0})
         agent.store_action(action)
         game.step()
 
@@ -50,7 +50,7 @@ def test_node_startup_shutdown(game_and_agent_fixture: Tuple[PrimaiteGame, Proxy
     assert client_1.operating_state == NodeOperatingState.BOOTING
 
     for i in range(client_1.start_up_duration + 1):
-        action = ("DONOTHING", {"node_id": 0})
+        action = ("do_nothing", {"node_id": 0})
         agent.store_action(action)
         game.step()
 
@@ -80,7 +80,7 @@ def test_node_cannot_be_shut_down_if_node_is_already_off(game_and_agent_fixture:
     client_1.power_off()
 
     for i in range(client_1.shut_down_duration + 1):
-        action = ("DONOTHING", {"node_id": 0})
+        action = ("do_nothing", {"node_id": 0})
         agent.store_action(action)
         game.step()
 
diff --git a/tests/integration_tests/game_layer/test_RNG_seed.py b/tests/integration_tests/game_layer/test_RNG_seed.py
index 0c6d567d..e772af32 100644
--- a/tests/integration_tests/game_layer/test_RNG_seed.py
+++ b/tests/integration_tests/game_layer/test_RNG_seed.py
@@ -24,12 +24,12 @@ def test_rng_seed_set(create_env):
     env.reset(seed=3)
     for i in range(100):
         env.step(0)
-    a = [item.timestep for item in env.game.agents["client_2_green_user"].history if item.action != "DONOTHING"]
+    a = [item.timestep for item in env.game.agents["client_2_green_user"].history if item.action != "do_nothing"]
 
     env.reset(seed=3)
     for i in range(100):
         env.step(0)
-    b = [item.timestep for item in env.game.agents["client_2_green_user"].history if item.action != "DONOTHING"]
+    b = [item.timestep for item in env.game.agents["client_2_green_user"].history if item.action != "do_nothing"]
 
     assert a == b
 
@@ -40,11 +40,11 @@ def test_rng_seed_unset(create_env):
     env.reset()
     for i in range(100):
         env.step(0)
-    a = [item.timestep for item in env.game.agents["client_2_green_user"].history if item.action != "DONOTHING"]
+    a = [item.timestep for item in env.game.agents["client_2_green_user"].history if item.action != "do_nothing"]
 
     env.reset()
     for i in range(100):
         env.step(0)
-    b = [item.timestep for item in env.game.agents["client_2_green_user"].history if item.action != "DONOTHING"]
+    b = [item.timestep for item in env.game.agents["client_2_green_user"].history if item.action != "do_nothing"]
 
     assert a != b
diff --git a/tests/integration_tests/game_layer/test_action_mask.py b/tests/integration_tests/game_layer/test_action_mask.py
index 64464724..7a1475c2 100644
--- a/tests/integration_tests/game_layer/test_action_mask.py
+++ b/tests/integration_tests/game_layer/test_action_mask.py
@@ -91,7 +91,7 @@ def test_mask_contents_correct():
             assert mask[action_num]
             node_obj.operating_state = NodeOperatingState.ON
 
-        if act_type == "DONOTHING":
+        if act_type == "do_nothing":
             assert mask[action_num]
 
         if act_type == "NODE_SERVICE_DISABLE":
diff --git a/tests/integration_tests/game_layer/test_actions.py b/tests/integration_tests/game_layer/test_actions.py
index 9fdf029b..859c056c 100644
--- a/tests/integration_tests/game_layer/test_actions.py
+++ b/tests/integration_tests/game_layer/test_actions.py
@@ -32,7 +32,7 @@ FIREWALL_ACTIONS_NETWORK = TEST_ASSETS_ROOT / "configs/firewall_actions_network.
 
 
 def test_do_nothing_integration(game_and_agent: Tuple[PrimaiteGame, ProxyAgent]):
-    """Test that the DoNothingAction can form a request and that it is accepted by the simulation."""
+    """Test that the do_nothingAction can form a request and that it is accepted by the simulation."""
     game, agent = game_and_agent
 
     action = ("do_nothing", {})
diff --git a/tests/integration_tests/game_layer/test_rewards.py b/tests/integration_tests/game_layer/test_rewards.py
index 0005b508..882c0923 100644
--- a/tests/integration_tests/game_layer/test_rewards.py
+++ b/tests/integration_tests/game_layer/test_rewards.py
@@ -31,7 +31,7 @@ def test_WebpageUnavailablePenalty(game_and_agent):
     agent.reward_function.register_component(comp, 0.7)
 
     # Check that before trying to fetch the webpage, the reward is 0.0
-    agent.store_action(("DONOTHING", {}))
+    agent.store_action(("do_nothing", {}))
     game.step()
     assert agent.reward_function.current_reward == 0.0
 
@@ -149,7 +149,7 @@ def test_action_penalty():
     # Create an ActionPenalty Reward
     Penalty = ActionPenalty(action_penalty=-0.75, do_nothing_penalty=0.125)
 
-    # Assert that penalty is applied if action isn't DONOTHING
+    # Assert that penalty is applied if action isn't do_nothing
     reward_value = Penalty.calculate(
         state={},
         last_action_response=AgentHistoryItem(
@@ -163,12 +163,12 @@ def test_action_penalty():
 
     assert reward_value == -0.75
 
-    # Assert that no penalty applied for a DONOTHING action
+    # Assert that no penalty applied for a do_nothing action
     reward_value = Penalty.calculate(
         state={},
         last_action_response=AgentHistoryItem(
             timestep=0,
-            action="DONOTHING",
+            action="do_nothing",
             parameters={},
             request=["do_nothing"],
             response=RequestResponse.from_bool(True),
@@ -186,7 +186,7 @@ def test_action_penalty_e2e(game_and_agent):
 
     agent.reward_function.register_component(comp, 1.0)
 
-    action = ("DONOTHING", {})
+    action = ("do_nothing", {})
     agent.store_action(action)
     game.step()
     assert agent.reward_function.current_reward == 0.125
diff --git a/tests/unit_tests/_primaite/_game/_agent/test_actions.py b/tests/unit_tests/_primaite/_game/_agent/test_actions.py
index c2d31ee1..46963015 100644
--- a/tests/unit_tests/_primaite/_game/_agent/test_actions.py
+++ b/tests/unit_tests/_primaite/_game/_agent/test_actions.py
@@ -5,7 +5,7 @@ import pytest
 
 from primaite.game.agent.actions import (
     ActionManager,
-    DoNothingAction,
+    do_nothingAction,
     NodeServiceDisableAction,
     NodeServiceEnableAction,
     NodeServicePauseAction,
@@ -18,10 +18,10 @@ from primaite.game.agent.actions import (
 
 
 def test_do_nothing_action_form_request():
-    """Test that the DoNothingAction can form a request and that it is correct."""
+    """Test that the do_nothingAction can form a request and that it is correct."""
     manager = Mock()
 
-    action = DoNothingAction(manager=manager)
+    action = do_nothingAction(manager=manager)
 
     request = action.form_request()
 
diff --git a/tests/unit_tests/_primaite/_game/_agent/test_sticky_rewards.py b/tests/unit_tests/_primaite/_game/_agent/test_sticky_rewards.py
index 58f0fcc1..78113f5f 100644
--- a/tests/unit_tests/_primaite/_game/_agent/test_sticky_rewards.py
+++ b/tests/unit_tests/_primaite/_game/_agent/test_sticky_rewards.py
@@ -70,7 +70,7 @@ class TestWebpageUnavailabilitySticky:
         reward = WebpageUnavailablePenalty("computer", sticky=False)
 
         # no response codes yet, reward is 0
-        action, params, request = "DO_NOTHING", {}, ["DONOTHING"]
+        action, params, request = "DO_NOTHING", {}, ["do_nothing"]
         response = RequestResponse(status="success", data={})
         browser_history = []
         state = {"network": {"nodes": {"computer": {"applications": {"WebBrowser": {"history": browser_history}}}}}}
@@ -93,7 +93,7 @@ class TestWebpageUnavailabilitySticky:
 
         # THE IMPORTANT BIT
         # agent did nothing, because reward is not sticky, it goes back to 0
-        action, params, request = "DO_NOTHING", {}, ["DONOTHING"]
+        action, params, request = "DO_NOTHING", {}, ["do_nothing"]
         response = RequestResponse(status="success", data={})
         browser_history = []
         state = {"network": {"nodes": {"computer": {"applications": {"WebBrowser": {"history": browser_history}}}}}}
@@ -130,7 +130,7 @@ class TestWebpageUnavailabilitySticky:
         reward = WebpageUnavailablePenalty("computer", sticky=True)
 
         # no response codes yet, reward is 0
-        action, params, request = "DO_NOTHING", {}, ["DONOTHING"]
+        action, params, request = "DO_NOTHING", {}, ["do_nothing"]
         response = RequestResponse(status="success", data={})
         browser_history = []
         state = {"network": {"nodes": {"computer": {"applications": {"WebBrowser": {"history": browser_history}}}}}}
@@ -153,7 +153,7 @@ class TestWebpageUnavailabilitySticky:
 
         # THE IMPORTANT BIT
         # agent did nothing, because reward is sticky, it stays at 1.0
-        action, params, request = "DO_NOTHING", {}, ["DONOTHING"]
+        action, params, request = "DO_NOTHING", {}, ["do_nothing"]
         response = RequestResponse(status="success", data={})
         state = {"network": {"nodes": {"computer": {"applications": {"WebBrowser": {"history": browser_history}}}}}}
         last_action_response = AgentHistoryItem(
@@ -191,7 +191,7 @@ class TestGreenAdminDatabaseUnreachableSticky:
         reward = GreenAdminDatabaseUnreachablePenalty("computer", sticky=False)
 
         # no response codes yet, reward is 0
-        action, params, request = "DO_NOTHING", {}, ["DONOTHING"]
+        action, params, request = "DO_NOTHING", {}, ["do_nothing"]
         response = RequestResponse(status="success", data={})
         state = {"network": {"nodes": {"computer": {"applications": {"DatabaseClient": {}}}}}}
         last_action_response = AgentHistoryItem(
@@ -212,7 +212,7 @@ class TestGreenAdminDatabaseUnreachableSticky:
 
         # THE IMPORTANT BIT
         # agent did nothing, because reward is not sticky, it goes back to 0
-        action, params, request = "DO_NOTHING", {}, ["DONOTHING"]
+        action, params, request = "DO_NOTHING", {}, ["do_nothing"]
         response = RequestResponse(status="success", data={})
         browser_history = []
         state = {"network": {"nodes": {"computer": {"applications": {"DatabaseClient": {}}}}}}
@@ -247,7 +247,7 @@ class TestGreenAdminDatabaseUnreachableSticky:
         reward = GreenAdminDatabaseUnreachablePenalty("computer", sticky=True)
 
         # no response codes yet, reward is 0
-        action, params, request = "DO_NOTHING", {}, ["DONOTHING"]
+        action, params, request = "DO_NOTHING", {}, ["do_nothing"]
         response = RequestResponse(status="success", data={})
         state = {"network": {"nodes": {"computer": {"applications": {"DatabaseClient": {}}}}}}
         last_action_response = AgentHistoryItem(
@@ -268,7 +268,7 @@ class TestGreenAdminDatabaseUnreachableSticky:
 
         # THE IMPORTANT BIT
         # agent did nothing, because reward is not sticky, it goes back to 0
-        action, params, request = "DO_NOTHING", {}, ["DONOTHING"]
+        action, params, request = "DO_NOTHING", {}, ["do_nothing"]
         response = RequestResponse(status="success", data={})
         state = {"network": {"nodes": {"computer": {"applications": {"DatabaseClient": {}}}}}}
         last_action_response = AgentHistoryItem(