Merge branch 'dev' into feature/1386-enable-a-repeatable-or-deterministic-baseline-test

2023-07-03 16:56:44 +01:00
parent 8f2fd77634 8ab936fcdc
commit a883e45bbf
16 changed files with 527 additions and 79 deletions
--- a/tests/test_red_random_agent_behaviour.py
+++ b/tests/test_red_random_agent_behaviour.py
@@ -0,0 +1,77 @@
+from datetime import datetime
+
+from primaite.config.lay_down_config import data_manipulation_config_path
+from primaite.environment.primaite_env import Primaite
+from primaite.nodes.node_state_instruction_red import NodeStateInstructionRed
+from tests import TEST_CONFIG_ROOT
+from tests.conftest import _get_temp_session_path
+
+
+def run_generic(env, config_values):
+    """Run against a generic agent."""
+    # Reset the environment at the start of the episode
+    env.reset()
+    for episode in range(0, config_values.num_episodes):
+        for step in range(0, config_values.num_steps):
+            # Send the observation space to the agent to get an action
+            # TEMP - random action for now
+            # action = env.blue_agent_action(obs)
+            # action = env.action_space.sample()
+            action = 0
+
+            # Run the simulation step on the live environment
+            obs, reward, done, info = env.step(action)
+
+            # Break if done is True
+            if done:
+                break
+
+        # Reset the environment at the end of the episode
+        env.reset()
+
+    env.close()
+
+
+def test_random_red_agent_behaviour():
+    """
+    Test that hardware state is penalised at each step.
+
+    When the initial state is OFF compared to reference state which is ON.
+    """
+    list_of_node_instructions = []
+
+    # RUN TWICE so we can make sure that red agent is randomised
+    for i in range(2):
+        """Takes a config path and returns the created instance of Primaite."""
+        session_timestamp: datetime = datetime.now()
+        session_path = _get_temp_session_path(session_timestamp)
+
+        timestamp_str = session_timestamp.strftime("%Y-%m-%d_%H-%M-%S")
+        env = Primaite(
+            training_config_path=TEST_CONFIG_ROOT
+            / "one_node_states_on_off_main_config.yaml",
+            lay_down_config_path=data_manipulation_config_path(),
+            transaction_list=[],
+            session_path=session_path,
+            timestamp_str=timestamp_str,
+        )
+        # set red_agent_
+        env.training_config.random_red_agent = True
+        training_config = env.training_config
+        training_config.num_steps = env.episode_steps
+
+        run_generic(env, training_config)
+        # add red pol instructions to list
+        list_of_node_instructions.append(env.red_node_pol)
+
+    # compare instructions to make sure that red instructions are truly random
+    for index, instruction in enumerate(list_of_node_instructions):
+        for key in list_of_node_instructions[index].keys():
+            instruction: NodeStateInstructionRed = list_of_node_instructions[index][key]
+            print(f"run {index}")
+            print(f"{key} start step: {instruction.get_start_step()}")
+            print(f"{key} end step: {instruction.get_end_step()}")
+            print(f"{key} target node id: {instruction.get_target_node_id()}")
+            print("")
+
+    assert list_of_node_instructions[0].__ne__(list_of_node_instructions[1])
--- a/tests/test_reward.py
+++ b/tests/test_reward.py
@@ -16,17 +16,26 @@ def test_rewards_are_being_penalised_at_each_step_function():
    )

    """
-    On different steps (of the 13 in total) these are the following rewards for config_6 which are activated:
-        File System State: goodShouldBeCorrupt = 5 (between Steps 1 & 3)
-        Hardware State: onShouldBeOff = -2 (between Steps 4 & 6)
-        Service State: goodShouldBeCompromised = 5 (between Steps 7 & 9)
-        Software State (Software State): goodShouldBeCompromised = 5 (between Steps 10 & 12)
+    The config 'one_node_states_on_off_lay_down_config.yaml' has 15 steps:
+        On different steps, the laydown config has Pattern of Life (PoLs) which change a state of the node's attribute.
+        For example, turning the nodes' file system state to CORRUPT from its original state GOOD.
+        As a result these are the following rewards are activated:
+            File System State: corrupt_should_be_good = -10 * 2 (on Steps 1 & 2)
+            Hardware State: off_should_be_on = -10 * 2 (on Steps 4 & 5)
+            Service State: compromised_should_be_good = -20 * 2 (on Steps 7 & 8)
+            Software State: compromised_should_be_good = -20 * 2 (on Steps 10 & 11)

-    Total Reward: -2 - 2 + 5 + 5 + 5 + 5 + 5 + 5 = 26
-    Step Count: 13
+            The Pattern of Life (PoLs) last for 2 steps, so the agent is penalised twice.
+
+    Note: This test run inherits from conftest.py where the PrimAITE environment is ran and the blue agent is hard-coded
+    to do NOTHING on every step.
+    We use Pattern of Lifes (PoLs) to change the nodes states and display that the agent is being penalised on all steps
+    where the live network node differs from the network reference node.
+
+    Total Reward: -10 + -10 + -10 + -10 + -20 + -20 + -20 + -20 = -120
+    Step Count: 15

    For the 4 steps where this occurs the average reward is:
-        Average Reward: 2 (26 / 13)
+        Average Reward: -8 (-120 / 15)
    """
-    print("average reward", env.average_reward)
    assert env.average_reward == -8.0