From 3438ce7e09502cfa212dad97e22594ac62b17ff2 Mon Sep 17 00:00:00 2001
From: Chris McCarthy <chris.mccarthy@methods.co.uk>
Date: Thu, 6 Jul 2023 11:35:44 +0100
Subject: [PATCH] #1386 - Updated tests in
 test_seeding_and_deterministic_session.py to use TempPrimaiteSession. - Added
 test_seeded_learning test and test_deterministic_evaluation test. - Passed
 config values seed and deterministic to ppo agent - Dropped deterministic
 override in evaluate functions - TempPrimaiteSession now writes files to a
 UUID folder rather than datetime - Added seed to Ray RLlib agent setup in
 rllib.py - Added seed to SB3 agent setup in sb3.py

---
 src/primaite/agents/agent.py                  |   1 +
 src/primaite/agents/rllib.py                  |   1 +
 src/primaite/agents/sb3.py                    |  10 +-
 .../ppo_not_seeded_training_config.yaml       | 155 ++++++++++++++++++
 .../{e2e => }/ppo_seeded_training_config.yaml |   0
 tests/conftest.py                             |   1 -
 .../test_session_repeatability.py             |  57 -------
 tests/mock_and_patch/get_session_path_mock.py |   5 +-
 .../test_seeding_and_deterministic_session.py |  57 +++++++
 9 files changed, 222 insertions(+), 65 deletions(-)
 create mode 100644 tests/config/ppo_not_seeded_training_config.yaml
 rename tests/config/{e2e => }/ppo_seeded_training_config.yaml (100%)
 delete mode 100644 tests/e2e_integration_tests/test_session_repeatability.py
 create mode 100644 tests/test_seeding_and_deterministic_session.py

diff --git a/src/primaite/agents/agent.py b/src/primaite/agents/agent.py
index 4b39839a..5b76c36b 100644
--- a/src/primaite/agents/agent.py
+++ b/src/primaite/agents/agent.py
@@ -248,6 +248,7 @@ class AgentSessionABC(ABC):
 
             agent.session_path = path
 
+
             return agent
 
         else:
diff --git a/src/primaite/agents/rllib.py b/src/primaite/agents/rllib.py
index 30edd93c..dcb1f5c5 100644
--- a/src/primaite/agents/rllib.py
+++ b/src/primaite/agents/rllib.py
@@ -106,6 +106,7 @@ class RLlibAgent(AgentSessionABC):
                 timestamp_str=self.timestamp_str,
             ),
         )
+        self._agent_config.seed = self._training_config.seed
 
         self._agent_config.training(train_batch_size=self._training_config.num_steps)
         self._agent_config.framework(framework="tf")
diff --git a/src/primaite/agents/sb3.py b/src/primaite/agents/sb3.py
index 17fbe0a6..0d031c10 100644
--- a/src/primaite/agents/sb3.py
+++ b/src/primaite/agents/sb3.py
@@ -59,6 +59,7 @@ class SB3Agent(AgentSessionABC):
             verbose=self.sb3_output_verbose_level,
             n_steps=self._training_config.num_steps,
             tensorboard_log=str(self._tensorboard_log_path),
+            seed=self._training_config.seed
         )
 
     def _save_checkpoint(self):
@@ -98,20 +99,18 @@ class SB3Agent(AgentSessionABC):
 
     def evaluate(
         self,
-        deterministic: bool = True,
         **kwargs,
     ):
         """
         Evaluate the agent.
 
-        :param deterministic: Whether the evaluation is deterministic.
         :param kwargs: Any agent-specific key-word args to be passed.
         """
         time_steps = self._training_config.num_steps
         episodes = self._training_config.num_episodes
         self._env.set_as_eval()
         self.is_eval = True
-        if deterministic:
+        if self._training_config.deterministic:
             deterministic_str = "deterministic"
         else:
             deterministic_str = "non-deterministic"
@@ -122,7 +121,10 @@ class SB3Agent(AgentSessionABC):
             obs = self._env.reset()
 
             for step in range(time_steps):
-                action, _states = self._agent.predict(obs, deterministic=deterministic)
+                action, _states = self._agent.predict(
+                    obs,
+                    deterministic=self._training_config.deterministic
+                )
                 if isinstance(action, np.ndarray):
                     action = np.int64(action)
                 obs, rewards, done, info = self._env.step(action)
diff --git a/tests/config/ppo_not_seeded_training_config.yaml b/tests/config/ppo_not_seeded_training_config.yaml
new file mode 100644
index 00000000..f43c151c
--- /dev/null
+++ b/tests/config/ppo_not_seeded_training_config.yaml
@@ -0,0 +1,155 @@
+# Training Config File
+
+# Sets which agent algorithm framework will be used.
+# Options are:
+# "SB3" (Stable Baselines3)
+# "RLLIB" (Ray RLlib)
+# "CUSTOM" (Custom Agent)
+agent_framework: SB3
+
+# Sets which deep learning framework will be used (by RLlib ONLY).
+# Default is TF (Tensorflow).
+# Options are:
+# "TF" (Tensorflow)
+# TF2 (Tensorflow 2.X)
+# TORCH (PyTorch)
+deep_learning_framework: TF2
+
+# Sets which Agent class will be used.
+# Options are:
+# "A2C" (Advantage Actor Critic coupled with either SB3 or RLLIB agent_framework)
+# "PPO" (Proximal Policy Optimization coupled with either SB3 or RLLIB agent_framework)
+# "HARDCODED" (The HardCoded agents coupled with an ACL or NODE action_type)
+# "DO_NOTHING" (The DoNothing agents coupled with an ACL or NODE action_type)
+# "RANDOM" (primaite.agents.simple.RandomAgent)
+# "DUMMY" (primaite.agents.simple.DummyAgent)
+agent_identifier: PPO
+
+# Sets whether Red Agent POL and IER is randomised.
+# Options are:
+# True
+# False
+random_red_agent: False
+
+# The (integer) seed to be used in random number generation
+# Default is None (null)
+seed: None
+
+# Set whether the agent will be deterministic instead of stochastic
+# Options are:
+# True
+# False
+deterministic: False
+
+# Sets what view of the environment the deterministic hardcoded agent has. The default is BASIC.
+# Options are:
+# "BASIC" (The current observation space only)
+# "FULL" (Full environment view with actions taken and reward feedback)
+hard_coded_agent_view: FULL
+
+# Sets How the Action Space is defined:
+# "NODE"
+# "ACL"
+# "ANY" node and acl actions
+action_type: NODE
+# observation space
+observation_space:
+  # flatten: true
+  components:
+    - name: NODE_LINK_TABLE
+    # - name: NODE_STATUSES
+    # - name: LINK_TRAFFIC_LEVELS
+# Number of episodes to run per session
+num_episodes: 10
+
+# Number of time_steps per episode
+num_steps: 256
+
+# Sets how often the agent will save a checkpoint (every n time episodes).
+# Set to 0 if no checkpoints are required. Default is 10
+checkpoint_every_n_episodes: 0
+
+# Time delay (milliseconds) between steps for CUSTOM agents.
+time_delay: 5
+
+# Type of session to be run. Options are:
+# "TRAIN" (Trains an agent)
+# "EVAL" (Evaluates an agent)
+# "TRAIN_EVAL" (Trains then evaluates an agent)
+session_type: TRAIN_EVAL
+
+# Environment config values
+# The high value for the observation space
+observation_space_high_value: 1000000000
+
+# The Stable Baselines3 learn/eval output verbosity level:
+# Options are:
+# "NONE" (No Output)
+# "INFO" (Info Messages (such as devices and wrappers used))
+# "DEBUG" (All Messages)
+sb3_output_verbose_level: NONE
+
+# Reward values
+# Generic
+all_ok: 0
+# Node Hardware State
+off_should_be_on: -10
+off_should_be_resetting: -5
+on_should_be_off: -2
+on_should_be_resetting: -5
+resetting_should_be_on: -5
+resetting_should_be_off: -2
+resetting: -3
+# Node Software or Service State
+good_should_be_patching: 2
+good_should_be_compromised: 5
+good_should_be_overwhelmed: 5
+patching_should_be_good: -5
+patching_should_be_compromised: 2
+patching_should_be_overwhelmed: 2
+patching: -3
+compromised_should_be_good: -20
+compromised_should_be_patching: -20
+compromised_should_be_overwhelmed: -20
+compromised: -20
+overwhelmed_should_be_good: -20
+overwhelmed_should_be_patching: -20
+overwhelmed_should_be_compromised: -20
+overwhelmed: -20
+# Node File System State
+good_should_be_repairing: 2
+good_should_be_restoring: 2
+good_should_be_corrupt: 5
+good_should_be_destroyed: 10
+repairing_should_be_good: -5
+repairing_should_be_restoring: 2
+repairing_should_be_corrupt: 2
+repairing_should_be_destroyed: 0
+repairing: -3
+restoring_should_be_good: -10
+restoring_should_be_repairing: -2
+restoring_should_be_corrupt: 1
+restoring_should_be_destroyed: 2
+restoring: -6
+corrupt_should_be_good: -10
+corrupt_should_be_repairing: -10
+corrupt_should_be_restoring: -10
+corrupt_should_be_destroyed: 2
+corrupt: -10
+destroyed_should_be_good: -20
+destroyed_should_be_repairing: -20
+destroyed_should_be_restoring: -20
+destroyed_should_be_corrupt: -20
+destroyed: -20
+scanning: -2
+# IER status
+red_ier_running: -5
+green_ier_blocked: -10
+
+# Patching / Reset durations
+os_patching_duration: 5            # The time taken to patch the OS
+node_reset_duration: 5             # The time taken to reset a node (hardware)
+service_patching_duration: 5       # The time taken to patch a service
+file_system_repairing_limit: 5      # The time take to repair the file system
+file_system_restoring_limit: 5      # The time take to restore the file system
+file_system_scanning_limit: 5       # The time taken to scan the file system
diff --git a/tests/config/e2e/ppo_seeded_training_config.yaml b/tests/config/ppo_seeded_training_config.yaml
similarity index 100%
rename from tests/config/e2e/ppo_seeded_training_config.yaml
rename to tests/config/ppo_seeded_training_config.yaml
diff --git a/tests/conftest.py b/tests/conftest.py
index af76b314..388bc034 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -58,7 +58,6 @@ class TempPrimaiteSession(PrimaiteSession):
 
     def __exit__(self, type, value, tb):
         shutil.rmtree(self.session_path)
-        shutil.rmtree(self.session_path.parent)
         _LOGGER.debug(f"Deleted temp session directory: {self.session_path}")
 
 
diff --git a/tests/e2e_integration_tests/test_session_repeatability.py b/tests/e2e_integration_tests/test_session_repeatability.py
deleted file mode 100644
index 99cb158d..00000000
--- a/tests/e2e_integration_tests/test_session_repeatability.py
+++ /dev/null
@@ -1,57 +0,0 @@
-"""
-Seed tests.
-
-These tests will train an agent.
-This agent is then loaded and evaluated twice,
-the 2 evaluation wuns should be the same.
-
-This proves that the seed works.
-"""
-import time
-
-from primaite.config.lay_down_config import dos_very_basic_config_path
-from primaite.primaite_session import PrimaiteSession
-from tests import TEST_CONFIG_ROOT
-
-
-def test_seeded_sessions():
-    """Test to see if seed works in multiple sessions."""
-    # ppo training session
-    ppo_train = PrimaiteSession(TEST_CONFIG_ROOT / "e2e/ppo_seeded_training_config.yaml", dos_very_basic_config_path())
-    # train agent
-    ppo_train.setup()
-    ppo_train.learn()
-    ppo_train.close()
-
-    # agent path to use for evaluation
-    path_prefix = f"{ppo_train._training_config.agent_framework}_{ppo_train._training_config.agent_identifier}"
-    agent_path = ppo_train.session_path / f"{path_prefix}_{ppo_train.timestamp_str}.zip"
-
-    ppo_session_1 = PrimaiteSession(
-        TEST_CONFIG_ROOT / "e2e/ppo_seeded_training_config.yaml", dos_very_basic_config_path()
-    )
-
-    # load trained agent
-    ppo_session_1._training_config.agent_load_file = agent_path
-    ppo_session_1.setup()
-    time.sleep(1)
-
-    ppo_session_2 = PrimaiteSession(
-        TEST_CONFIG_ROOT / "e2e/ppo_seeded_training_config.yaml", dos_very_basic_config_path()
-    )
-
-    # load trained agent
-    ppo_session_2._training_config.agent_load_file = agent_path
-    ppo_session_2.setup()
-
-    # run evaluation
-    ppo_session_1.evaluate()
-    ppo_session_1.close()
-    ppo_session_2.evaluate()
-    ppo_session_2.close()
-
-    # compare output
-    # assert compare_transaction_file(
-    #     ppo_session_1.evaluation_path / f"all_transactions_{ppo_session_1.timestamp_str}.csv",
-    #     ppo_session_2.evaluation_path / f"all_transactions_{ppo_session_2.timestamp_str}.csv"
-    # ) is True
diff --git a/tests/mock_and_patch/get_session_path_mock.py b/tests/mock_and_patch/get_session_path_mock.py
index feff52f6..90c0cb5d 100644
--- a/tests/mock_and_patch/get_session_path_mock.py
+++ b/tests/mock_and_patch/get_session_path_mock.py
@@ -1,6 +1,7 @@
 import tempfile
 from datetime import datetime
 from pathlib import Path
+from uuid import uuid4
 
 from primaite import getLogger
 
@@ -14,9 +15,7 @@ def get_temp_session_path(session_timestamp: datetime) -> Path:
     :param session_timestamp: This is the datetime that the session started.
     :return: The session directory path.
     """
-    date_dir = session_timestamp.strftime("%Y-%m-%d")
-    session_path = session_timestamp.strftime("%Y-%m-%d_%H-%M-%S")
-    session_path = Path(tempfile.gettempdir()) / "primaite" / date_dir / session_path
+    session_path = Path(tempfile.gettempdir()) / "primaite" / str(uuid4())
     session_path.mkdir(exist_ok=True, parents=True)
     _LOGGER.debug(f"Created temp session directory: {session_path}")
     return session_path
diff --git a/tests/test_seeding_and_deterministic_session.py b/tests/test_seeding_and_deterministic_session.py
new file mode 100644
index 00000000..0e420459
--- /dev/null
+++ b/tests/test_seeding_and_deterministic_session.py
@@ -0,0 +1,57 @@
+import pytest as pytest
+
+from primaite.config.lay_down_config import dos_very_basic_config_path
+from tests import TEST_CONFIG_ROOT
+
+
+@pytest.mark.parametrize(
+    "temp_primaite_session",
+    [[
+        TEST_CONFIG_ROOT / "ppo_seeded_training_config.yaml",
+        dos_very_basic_config_path()
+    ]],
+    indirect=True,
+)
+def test_seeded_learning(temp_primaite_session):
+    """Test running seeded learning produces the same output when ran twice."""
+
+    expected_mean_reward_per_episode = {
+        1: -90.703125,
+        2: -91.15234375,
+        3: -87.5,
+        4: -92.2265625,
+        5: -94.6875,
+        6: -91.19140625,
+        7: -88.984375,
+        8: -88.3203125,
+        9: -112.79296875,
+        10: -100.01953125
+    }
+    with temp_primaite_session as session:
+        assert session._training_config.seed == 67890, \
+            "Expected output is based upon a agent that was trained with " \
+            "seed 67890"
+        session.learn()
+        actual_mean_reward_per_episode = session.learn_av_reward_per_episode()
+
+    assert actual_mean_reward_per_episode == expected_mean_reward_per_episode
+
+
+@pytest.mark.skip(reason="Inconsistent results. Needs someone with RL "
+                         "knowledge to investigate further.")
+@pytest.mark.parametrize(
+    "temp_primaite_session",
+    [[
+        TEST_CONFIG_ROOT / "ppo_seeded_training_config.yaml",
+        dos_very_basic_config_path()
+    ]],
+    indirect=True,
+)
+def test_deterministic_evaluation(temp_primaite_session):
+    """Test running deterministic evaluation gives same av eward per episode."""
+    with temp_primaite_session as session:
+        # do stuff
+        session.learn()
+        session.evaluate()
+        eval_mean_reward = session.eval_av_reward_per_episode_csv()
+        assert len(set(eval_mean_reward.values())) == 1