From 09412cb43d438ce8394bb24853092c46a28924bc Mon Sep 17 00:00:00 2001 From: SunilSamra Date: Tue, 27 Jun 2023 12:27:57 +0100 Subject: [PATCH 1/2] 1555 - updated doc-string to make test understanding easier --- tests/test_reward.py | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/tests/test_reward.py b/tests/test_reward.py index c3fcdfc4..56e31ed5 100644 --- a/tests/test_reward.py +++ b/tests/test_reward.py @@ -16,17 +16,25 @@ def test_rewards_are_being_penalised_at_each_step_function(): ) """ - On different steps (of the 13 in total) these are the following rewards for config_6 which are activated: - File System State: goodShouldBeCorrupt = 5 (between Steps 1 & 3) - Hardware State: onShouldBeOff = -2 (between Steps 4 & 6) - Service State: goodShouldBeCompromised = 5 (between Steps 7 & 9) - Software State (Software State): goodShouldBeCompromised = 5 (between Steps 10 & 12) + The config 'one_node_states_on_off_lay_down_config.yaml' has 15 steps: + On different steps, the laydown config has Pattern of Life (PoLs) which change a state of the node's attribute. + For example, turning the nodes' file system state to CORRUPT from its original state GOOD. + As a result these are the following rewards are activated: + File System State: corrupt_should_be_good = -10 * 2 (on Steps 1 = 3) + Hardware State: off_should_be_on = -10 * 2 (on Steps 4 - 6) + Service State: compromised_should_be_good = -20 * 2 (on Steps 7 - 9) + Software State: compromised_should_be_good = -20 * 2 (on Steps 10 - 12) - Total Reward: -2 - 2 + 5 + 5 + 5 + 5 + 5 + 5 = 26 - Step Count: 13 + The Pattern of Life (PoLs) last for 2 steps, so the agent is penalised twice. + + Note: This test run inherits conftest.py where the PrimAITE environment is ran and the blue agent is hard-coded + to do NOTHING on every step so we use Pattern of Lifes (PoLs) to change the nodes states and display that the agent + is being penalised on every step where the live network node differs from the network reference node. + + Total Reward: -10 + -10 + -10 + -10 + -20 + -20 + -20 + -20 = -120 + Step Count: 15 For the 4 steps where this occurs the average reward is: - Average Reward: 2 (26 / 13) + Average Reward: -8 (-120 / 15) """ - print("average reward", env.average_reward) assert env.average_reward == -8.0 From b8a4ede83f89a3d6b2912af8da1f0eb4be3eeb6d Mon Sep 17 00:00:00 2001 From: SunilSamra Date: Tue, 27 Jun 2023 16:59:43 +0100 Subject: [PATCH 2/2] 1555 - added specific steps to doc string --- tests/test_reward.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/tests/test_reward.py b/tests/test_reward.py index 56e31ed5..b8c92274 100644 --- a/tests/test_reward.py +++ b/tests/test_reward.py @@ -20,16 +20,17 @@ def test_rewards_are_being_penalised_at_each_step_function(): On different steps, the laydown config has Pattern of Life (PoLs) which change a state of the node's attribute. For example, turning the nodes' file system state to CORRUPT from its original state GOOD. As a result these are the following rewards are activated: - File System State: corrupt_should_be_good = -10 * 2 (on Steps 1 = 3) - Hardware State: off_should_be_on = -10 * 2 (on Steps 4 - 6) - Service State: compromised_should_be_good = -20 * 2 (on Steps 7 - 9) - Software State: compromised_should_be_good = -20 * 2 (on Steps 10 - 12) + File System State: corrupt_should_be_good = -10 * 2 (on Steps 1 & 2) + Hardware State: off_should_be_on = -10 * 2 (on Steps 4 & 5) + Service State: compromised_should_be_good = -20 * 2 (on Steps 7 & 8) + Software State: compromised_should_be_good = -20 * 2 (on Steps 10 & 11) The Pattern of Life (PoLs) last for 2 steps, so the agent is penalised twice. - Note: This test run inherits conftest.py where the PrimAITE environment is ran and the blue agent is hard-coded - to do NOTHING on every step so we use Pattern of Lifes (PoLs) to change the nodes states and display that the agent - is being penalised on every step where the live network node differs from the network reference node. + Note: This test run inherits from conftest.py where the PrimAITE environment is ran and the blue agent is hard-coded + to do NOTHING on every step. + We use Pattern of Lifes (PoLs) to change the nodes states and display that the agent is being penalised on all steps + where the live network node differs from the network reference node. Total Reward: -10 + -10 + -10 + -10 + -20 + -20 + -20 + -20 = -120 Step Count: 15