From 9623b1450a527b89162c4319998b9fa70681f9a3 Mon Sep 17 00:00:00 2001
From: SunilSamra <sunil-samra@nsc.co.uk>
Date: Tue, 27 Jun 2023 16:59:43 +0100
Subject: [PATCH] 1555 - added specific steps to doc string

---
 tests/test_reward.py | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/tests/test_reward.py b/tests/test_reward.py
index 56e31ed5..b8c92274 100644
--- a/tests/test_reward.py
+++ b/tests/test_reward.py
@@ -20,16 +20,17 @@ def test_rewards_are_being_penalised_at_each_step_function():
         On different steps, the laydown config has Pattern of Life (PoLs) which change a state of the node's attribute.
         For example, turning the nodes' file system state to CORRUPT from its original state GOOD.
         As a result these are the following rewards are activated:
-            File System State: corrupt_should_be_good = -10 * 2 (on Steps 1 = 3)
-            Hardware State: off_should_be_on = -10 * 2 (on Steps 4 - 6)
-            Service State: compromised_should_be_good = -20 * 2 (on Steps 7 - 9)
-            Software State: compromised_should_be_good = -20 * 2 (on Steps 10 - 12)
+            File System State: corrupt_should_be_good = -10 * 2 (on Steps 1 & 2)
+            Hardware State: off_should_be_on = -10 * 2 (on Steps 4 & 5)
+            Service State: compromised_should_be_good = -20 * 2 (on Steps 7 & 8)
+            Software State: compromised_should_be_good = -20 * 2 (on Steps 10 & 11)
 
             The Pattern of Life (PoLs) last for 2 steps, so the agent is penalised twice.
 
-    Note: This test run inherits conftest.py where the PrimAITE environment is ran and the blue agent is hard-coded
-    to do NOTHING on every step so we use Pattern of Lifes (PoLs) to change the nodes states and display that the agent
-    is being penalised on every step where the live network node differs from the network reference node.
+    Note: This test run inherits from conftest.py where the PrimAITE environment is ran and the blue agent is hard-coded
+    to do NOTHING on every step.
+    We use Pattern of Lifes (PoLs) to change the nodes states and display that the agent is being penalised on all steps
+    where the live network node differs from the network reference node.
 
     Total Reward: -10 + -10 + -10 + -10 + -20 + -20 + -20 + -20 = -120
     Step Count: 15