#1566 - Refactored the test_train_eval_episode_steps.py to sue TempPrimaiteSession.

- Fixed all errors that were caused b fixing the above. - Some tests still fail, these are for SS to fix. - Dropped the old run_generic stuff from conftest.py
2023-07-07 15:50:14 +01:00
parent d49f73f139
commit 40381833d3
15 changed files with 69 additions and 279 deletions
--- a/src/primaite/agents/agent.py
+++ b/src/primaite/agents/agent.py
@@ -153,12 +153,11 @@ class AgentSessionABC(ABC):
            metadata_dict = json.load(file)

        metadata_dict["end_datetime"] = datetime.now().isoformat()
-
        if not self.is_eval:
-            metadata_dict["learning"]["total_episodes"] = self._env.episode_count  # noqa
+            metadata_dict["learning"]["total_episodes"] = self._env.actual_episode_count  # noqa
            metadata_dict["learning"]["total_time_steps"] = self._env.total_step_count  # noqa
        else:
-            metadata_dict["evaluation"]["total_episodes"] = self._env.episode_count  # noqa
+            metadata_dict["evaluation"]["total_episodes"] = self._env.actual_episode_count  # noqa
            metadata_dict["evaluation"]["total_time_steps"] = self._env.total_step_count  # noqa

        filepath = self.session_path / "session_metadata.json"
@@ -209,10 +208,11 @@ class AgentSessionABC(ABC):

        :param kwargs: Any agent-specific key-word args to be passed.
        """
-        self._env.set_as_eval()  # noqa
-        self.is_eval = True
-        self._plot_av_reward_per_episode(learning_session=False)
-        _LOGGER.info("Finished evaluation")
+        if self._can_evaluate:
+            self._plot_av_reward_per_episode(learning_session=False)
+            self._update_session_metadata_file()
+            self.is_eval = True
+            _LOGGER.info("Finished evaluation")

    @abstractmethod
    def _get_latest_checkpoint(self):
--- a/src/primaite/agents/rllib.py
+++ b/src/primaite/agents/rllib.py
@@ -85,8 +85,12 @@ class RLlibAgent(AgentSessionABC):
            metadata_dict = json.load(file)

        metadata_dict["end_datetime"] = datetime.now().isoformat()
-        metadata_dict["total_episodes"] = self._current_result["episodes_total"]
-        metadata_dict["total_time_steps"] = self._current_result["timesteps_total"]
+        if not self.is_eval:
+            metadata_dict["learning"]["total_episodes"] = self._current_result["episodes_total"]  # noqa
+            metadata_dict["learning"]["total_time_steps"] = self._current_result["timesteps_total"]  # noqa
+        else:
+            metadata_dict["evaluation"]["total_episodes"] = self._current_result["episodes_total"]  # noqa
+            metadata_dict["evaluation"]["total_time_steps"] = self._current_result["timesteps_total"]  # noqa

        filepath = self.session_path / "session_metadata.json"
        _LOGGER.debug(f"Updating Session Metadata file: {filepath}")
@@ -150,7 +154,6 @@ class RLlibAgent(AgentSessionABC):

        super().learn()

-
    def evaluate(
        self,
        **kwargs,
--- a/src/primaite/agents/sb3.py
+++ b/src/primaite/agents/sb3.py
@@ -58,7 +58,7 @@ class SB3Agent(AgentSessionABC):
            PPOMlp,
            self._env,
            verbose=self.sb3_output_verbose_level,
-            n_steps=self._training_config.num_eval_steps,
+            n_steps=self._training_config.num_train_steps,
            tensorboard_log=str(self._tensorboard_log_path),
            seed=self._training_config.seed,
        )
@@ -93,7 +93,7 @@ class SB3Agent(AgentSessionABC):
        for i in range(episodes):
            self._agent.learn(total_timesteps=time_steps)
            self._save_checkpoint()
-        self._env.reset()
+        self._env._write_av_reward_per_episode()  # noqa
        self.save()
        self._env.close()
        super().learn()
@@ -129,7 +129,7 @@ class SB3Agent(AgentSessionABC):
                if isinstance(action, np.ndarray):
                    action = np.int64(action)
                obs, rewards, done, info = self._env.step(action)
-        self._env.reset()
+        self._env._write_av_reward_per_episode()  # noqa
        self._env.close()
        super().evaluate()

--- a/src/primaite/config/_package_data/training/training_config_main.yaml
+++ b/src/primaite/config/_package_data/training/training_config_main.yaml
@@ -68,7 +68,7 @@ num_train_episodes: 10
 num_train_steps: 256

 # Number of episodes for evaluation to run per session
-num_eval_episodes: 10
+num_eval_episodes: 1

 # Number of time_steps for evaluation per episode
 num_eval_steps: 256
--- a/src/primaite/config/training_config.py
+++ b/src/primaite/config/training_config.py
@@ -66,7 +66,7 @@ class TrainingConfig:
    num_train_steps: int = 256
    "The number of steps in an episode during an training session"

-    num_eval_episodes: int = 10
+    num_eval_episodes: int = 1
    "The number of episodes to train over during an evaluation session"

    num_eval_steps: int = 256
@@ -242,10 +242,10 @@ class TrainingConfig:
            tc += f"{self.hard_coded_agent_view}, "
        tc += f"{self.action_type}, "
        tc += f"observation_space={self.observation_space}, "
-        if self.session_type.name == "TRAIN":
+        if self.session_type is SessionType.TRAIN:
            tc += f"{self.num_train_episodes} episodes @ "
            tc += f"{self.num_train_steps} steps"
-        elif self.session_type.name == "EVAL":
+        elif self.session_type is SessionType.EVAL:
            tc += f"{self.num_eval_episodes} episodes @ "
            tc += f"{self.num_eval_steps} steps"
        else:
--- a/src/primaite/environment/primaite_env.py
+++ b/src/primaite/environment/primaite_env.py
@@ -261,6 +261,11 @@ class Primaite(Env):
        self.total_step_count = 0
        self.episode_steps = self.training_config.num_eval_steps

+    def _write_av_reward_per_episode(self):
+        if self.actual_episode_count > 0:
+            csv_data = self.actual_episode_count, self.average_reward
+            self.episode_av_reward_writer.write(csv_data)
+
    def reset(self):
        """
        AI Gym Reset function.
@@ -268,10 +273,7 @@ class Primaite(Env):
        Returns:
             Environment observation space (reset)
        """
-        if self.actual_episode_count > 0:
-            csv_data = self.actual_episode_count, self.average_reward
-            self.episode_av_reward_writer.write(csv_data)
-
+        self._write_av_reward_per_episode()
        self.episode_count += 1

        # Don't need to reset links, as they are cleared and recalculated every
--- a/src/primaite/environment/reward.py
+++ b/src/primaite/environment/reward.py
@@ -90,7 +90,6 @@ def calculate_reward_function(
                        f"Penalty of {ier_reward} was NOT applied."
                    )
                )
-
    return reward_value


--- a/src/primaite/utils/session_output_reader.py
+++ b/src/primaite/utils/session_output_reader.py
@@ -16,5 +16,6 @@ def av_rewards_dict(av_rewards_csv_file: Union[str, Path]) -> Dict[int, float]:
    :param av_rewards_csv_file: The average rewards per episode csv file path.
    :return: The average rewards per episode cdv as a dict.
    """
-    d = pl.read_csv(av_rewards_csv_file).to_dict()
-    return {v: d["Average Reward"][i] for i, v in enumerate(d["Episode"])}
+    df = pl.read_csv(av_rewards_csv_file).to_dict()
+
+    return {v: df["Average Reward"][i] for i, v in enumerate(df["Episode"])}
--- a/tests/config/one_node_states_on_off_lay_down_config.yaml
+++ b/tests/config/one_node_states_on_off_lay_down_config.yaml
@@ -18,11 +18,6 @@
  - name: ftp
    port: '21'
    state: GOOD
- item_type: POSITION
-  positions:
-  - node: '1'
-    x_pos: 309
-    y_pos: 78
 - item_type: RED_POL
  id: '1'
  start_step: 1
--- a/tests/config/one_node_states_on_off_main_config.yaml
+++ b/tests/config/one_node_states_on_off_main_config.yaml
@@ -22,17 +22,13 @@ agent_identifier: DUMMY
 # "ACL"
 # "ANY" node and acl actions
 action_type: NODE
-# Number of episodes for training to run per session
-num_train_episodes: 10

-# Number of time_steps for training per episode
-num_train_steps: 256

 # Number of episodes for evaluation to run per session
-num_eval_episodes: 10
+num_eval_episodes: 1

 # Number of time_steps for evaluation per episode
-num_eval_steps: 256
+num_eval_steps: 15
 # Time delay between steps (for generic agents)
 time_delay: 1

--- a/tests/config/train_episode_step.yaml
+++ b/tests/config/train_episode_step.yaml
@@ -52,20 +52,20 @@ observation_space:


 # Number of episodes for training to run per session
-num_train_episodes: 30
+num_train_episodes: 3

 # Number of time_steps for training per episode
-num_train_steps: 1
+num_train_steps: 25

 # Number of episodes for evaluation to run per session
-num_eval_episodes: 10
+num_eval_episodes: 1

 # Number of time_steps for evaluation per episode
-num_eval_steps: 10
+num_eval_steps: 17

 # Sets how often the agent will save a checkpoint (every n time episodes).
 # Set to 0 if no checkpoints are required. Default is 10
-checkpoint_every_n_episodes: 10
+checkpoint_every_n_episodes: 0

 # Time delay (milliseconds) between steps for CUSTOM agents.
 time_delay: 5
@@ -74,7 +74,7 @@ time_delay: 5
 # "TRAIN" (Trains an agent)
 # "EVAL" (Evaluates an agent)
 # "TRAIN_EVAL" (Trains then evaluates an agent)
-session_type: EVAL
+session_type: TRAIN_EVAL

 # Environment config values
 # The high value for the observation space
--- a/tests/config/train_eval_check_episode_step.yaml
+++ b/tests/config/train_eval_check_episode_step.yaml
@@ -1,153 +0,0 @@
-# Training Config File
-
-# Sets which agent algorithm framework will be used.
-# Options are:
-# "SB3" (Stable Baselines3)
-# "RLLIB" (Ray RLlib)
-# "CUSTOM" (Custom Agent)
-agent_framework: SB3
-
-# Sets which deep learning framework will be used (by RLlib ONLY).
-# Default is TF (Tensorflow).
-# Options are:
-# "TF" (Tensorflow)
-# TF2 (Tensorflow 2.X)
-# TORCH (PyTorch)
-deep_learning_framework: TF2
-
-# Sets which Agent class will be used.
-# Options are:
-# "A2C" (Advantage Actor Critic coupled with either SB3 or RLLIB agent_framework)
-# "PPO" (Proximal Policy Optimization coupled with either SB3 or RLLIB agent_framework)
-# "HARDCODED" (The HardCoded agents coupled with an ACL or NODE action_type)
-# "DO_NOTHING" (The DoNothing agents coupled with an ACL or NODE action_type)
-# "RANDOM" (primaite.agents.simple.RandomAgent)
-# "DUMMY" (primaite.agents.simple.DummyAgent)
-agent_identifier: PPO
-
-# Sets whether Red Agent POL and IER is randomised.
-# Options are:
-# True
-# False
-random_red_agent: False
-
-# Sets what view of the environment the deterministic hardcoded agent has. The default is BASIC.
-# Options are:
-# "BASIC" (The current observation space only)
-# "FULL" (Full environment view with actions taken and reward feedback)
-hard_coded_agent_view: FULL
-
-# Sets How the Action Space is defined:
-# "NODE"
-# "ACL"
-# "ANY" node and acl actions
-action_type: NODE
-# observation space
-observation_space:
-  # flatten: true
-  components:
-    - name: NODE_LINK_TABLE
-    # - name: NODE_STATUSES
-    # - name: LINK_TRAFFIC_LEVELS
-
-
-# Number of episodes for training to run per session
-num_train_episodes: 30
-
-# Number of time_steps for training per episode
-num_train_steps: 1
-
-# Number of episodes for evaluation to run per session
-num_eval_episodes: 10
-
-# Number of time_steps for evaluation per episode
-num_eval_steps: 10
-
-# Sets how often the agent will save a checkpoint (every n time episodes).
-# Set to 0 if no checkpoints are required. Default is 10
-checkpoint_every_n_episodes: 10
-
-# Time delay (milliseconds) between steps for CUSTOM agents.
-time_delay: 5
-
-# Type of session to be run. Options are:
-# "TRAIN" (Trains an agent)
-# "EVAL" (Evaluates an agent)
-# "TRAIN_EVAL" (Trains then evaluates an agent)
-session_type: TRAIN
-
-# Environment config values
-# The high value for the observation space
-observation_space_high_value: 1000000000
-
-# The Stable Baselines3 learn/eval output verbosity level:
-# Options are:
-# "NONE" (No Output)
-# "INFO" (Info Messages (such as devices and wrappers used))
-# "DEBUG" (All Messages)
-sb3_output_verbose_level: NONE
-
-# Reward values
-# Generic
-all_ok: 0
-# Node Hardware State
-off_should_be_on: -10
-off_should_be_resetting: -5
-on_should_be_off: -2
-on_should_be_resetting: -5
-resetting_should_be_on: -5
-resetting_should_be_off: -2
-resetting: -3
-# Node Software or Service State
-good_should_be_patching: 2
-good_should_be_compromised: 5
-good_should_be_overwhelmed: 5
-patching_should_be_good: -5
-patching_should_be_compromised: 2
-patching_should_be_overwhelmed: 2
-patching: -3
-compromised_should_be_good: -20
-compromised_should_be_patching: -20
-compromised_should_be_overwhelmed: -20
-compromised: -20
-overwhelmed_should_be_good: -20
-overwhelmed_should_be_patching: -20
-overwhelmed_should_be_compromised: -20
-overwhelmed: -20
-# Node File System State
-good_should_be_repairing: 2
-good_should_be_restoring: 2
-good_should_be_corrupt: 5
-good_should_be_destroyed: 10
-repairing_should_be_good: -5
-repairing_should_be_restoring: 2
-repairing_should_be_corrupt: 2
-repairing_should_be_destroyed: 0
-repairing: -3
-restoring_should_be_good: -10
-restoring_should_be_repairing: -2
-restoring_should_be_corrupt: 1
-restoring_should_be_destroyed: 2
-restoring: -6
-corrupt_should_be_good: -10
-corrupt_should_be_repairing: -10
-corrupt_should_be_restoring: -10
-corrupt_should_be_destroyed: 2
-corrupt: -10
-destroyed_should_be_good: -20
-destroyed_should_be_repairing: -20
-destroyed_should_be_restoring: -20
-destroyed_should_be_corrupt: -20
-destroyed: -20
-scanning: -2
-# IER status
-red_ier_running: -5
-green_ier_blocked: -10
-
-# Patching / Reset durations
-os_patching_duration: 5            # The time taken to patch the OS
-node_reset_duration: 5             # The time taken to reset a node (hardware)
-service_patching_duration: 5       # The time taken to patch a service
-file_system_repairing_limit: 5      # The time take to repair the file system
-file_system_restoring_limit: 5      # The time take to restore the file system
-file_system_scanning_limit: 5       # The time taken to scan the file system
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -1,17 +1,16 @@
 # Crown Copyright (C) Dstl 2022. DEFCON 703. Shared in confidence.
 import datetime
+import json
 import shutil
 import tempfile
-import time
 from datetime import datetime
 from pathlib import Path
-from typing import Dict, Union
+from typing import Any, Dict, Union
 from unittest.mock import patch

 import pytest

 from primaite import getLogger
-from primaite.common.enums import AgentIdentifier
 from primaite.environment.primaite_env import Primaite
 from primaite.primaite_session import PrimaiteSession
 from primaite.utils.session_output_reader import av_rewards_dict
@@ -48,6 +47,11 @@ class TempPrimaiteSession(PrimaiteSession):
        csv_file = f"average_reward_per_episode_{self.timestamp_str}.csv"
        return av_rewards_dict(self.evaluation_path / csv_file)

+    def metadata_file_as_dict(self) -> Dict[str, Any]:
+        """Read the session_metadata.json file and return as a dict."""
+        with open(self.session_path / "session_metadata.json", "r") as file:
+            return json.load(file)
+
    @property
    def env(self) -> Primaite:
        """Direct access to the env for ease of testing."""
@@ -58,6 +62,7 @@ class TempPrimaiteSession(PrimaiteSession):

    def __exit__(self, type, value, tb):
        shutil.rmtree(self.session_path)
+        shutil.rmtree(self.session_path.parent)
        _LOGGER.debug(f"Deleted temp session directory: {self.session_path}")


@@ -129,59 +134,3 @@ def temp_session_path() -> Path:
    session_path.mkdir(exist_ok=True, parents=True)

    return session_path
-
-
-def _get_primaite_env_from_config(
-    training_config_path: Union[str, Path],
-    lay_down_config_path: Union[str, Path],
-    temp_session_path,
-):
-    """Takes a config path and returns the created instance of Primaite."""
-    session_timestamp: datetime = datetime.now()
-    session_path = temp_session_path(session_timestamp)
-
-    timestamp_str = session_timestamp.strftime("%Y-%m-%d_%H-%M-%S")
-    env = Primaite(
-        training_config_path=training_config_path,
-        lay_down_config_path=lay_down_config_path,
-        session_path=session_path,
-        timestamp_str=timestamp_str,
-    )
-    config_values = env.training_config
-    config_values.num_steps = env.episode_steps
-
-    # TOOD: This needs t be refactored to happen outside. Should be part of
-    # a main Session class.
-    if env.training_config.agent_identifier is AgentIdentifier.RANDOM:
-        run_generic(env, config_values)
-
-    return env
-
-
-def run_generic(env, config_values):
-    """Run against a generic agent."""
-    # Reset the environment at the start of the episode
-    # env.reset()
-    print(config_values.num_train_episodes, "how many episodes")
-    for episode in range(0, config_values.num_train_episodes):
-        for step in range(0, config_values.num_train_steps):
-            # Send the observation space to the agent to get an action
-            # TEMP - random action for now
-            # action = env.blue_agent_action(obs)
-            # action = env.action_space.sample()
-            action = 0
-
-            # Run the simulation step on the live environment
-            obs, reward, done, info = env.step(action)
-
-            # Break if done is True
-            if done:
-                break
-
-            # Introduce a delay between steps
-            time.sleep(config_values.time_delay / 1000)
-
-        # Reset the environment at the end of the episode
-        # env.reset()
-
-    # env.close()
--- a/tests/test_reward.py
+++ b/tests/test_reward.py
@@ -1,7 +1,10 @@
 import pytest

+from primaite import getLogger
 from tests import TEST_CONFIG_ROOT

+_LOGGER = getLogger(__name__)
+

@pytest.mark.parametrize(
    "temp_primaite_session",
@@ -44,7 +47,6 @@ def test_rewards_are_being_penalised_at_each_step_function(
        Average Reward: -8 (-120 / 15)
    """
    with temp_primaite_session as session:
-        session.evaluate()
        session.close()
        ev_rewards = session.eval_av_reward_per_episode_csv()
        assert ev_rewards[1] == -8.0
--- a/tests/test_train_eval_episode_steps.py
+++ b/tests/test_train_eval_episode_steps.py
@@ -3,7 +3,6 @@ import pytest
 from primaite import getLogger
 from primaite.config.lay_down_config import dos_very_basic_config_path
 from tests import TEST_CONFIG_ROOT
-from tests.conftest import run_generic

 _LOGGER = getLogger(__name__)

@@ -14,33 +13,30 @@ _LOGGER = getLogger(__name__)
    indirect=True,
 )
 def test_eval_steps_differ_from_training(temp_primaite_session):
-    """Uses PrimaiteSession class to compare number of episodes used for training and evaluation."""
-    with temp_primaite_session as train_session:
-        env = train_session.env
-        train_session.learn()
+    """Uses PrimaiteSession class to compare number of episodes used for training and evaluation.

-    """
    Train_episode_step.yaml main config:
-    num_train_steps = 1
-    num_eval_steps = 10
-
-    When the YAML file changes from TRAIN to EVAL the episode step changes and uses the other config value.
-
-    The test is showing that 10 steps are running for evaluation and NOT 1 step as EVAL has been selected in the config.
+        num_train_steps = 25
+        num_train_episodes = 3
+        num_eval_steps = 17
+        num_eval_episodes = 1
    """
-    assert env.episode_steps == 10  # 30
-    # assert env.actual_episode_count == 10 # should be 10
+    expected_learning_metadata = {"total_episodes": 3, "total_time_steps": 75}

+    expected_evaluation_metadata = {"total_episodes": 1, "total_time_steps": 17}
+    with temp_primaite_session as session:
+        # Run learning and check episode and step counts
+        session.learn()
+        assert session.env.actual_episode_count == expected_learning_metadata["total_episodes"]
+        assert session.env.total_step_count == expected_learning_metadata["total_time_steps"]

-@pytest.mark.parametrize(
-    "temp_primaite_session",
-    [[TEST_CONFIG_ROOT / "train_episode_step.yaml", dos_very_basic_config_path()]],
-    indirect=True,
-)
-def test_train_eval_config_option(temp_primaite_session):
-    """Uses PrimaiteSession class to test number of episodes and steps used for TRAIN and EVAL option."""
-    with temp_primaite_session as train_session:
-        env = train_session.env
-        run_generic(env, env.training_config)
+        # Run evaluation and check episode and step counts
+        session.evaluate()
+        assert session.env.actual_episode_count == expected_evaluation_metadata["total_episodes"]
+        assert session.env.total_step_count == expected_evaluation_metadata["total_time_steps"]

-    print(env.actual_episode_count, env.step_count, env.total_step_count)
+        # Load the session_metadata.json file and check that the both the
+        # learning and evaluation match what is expected above
+        metadata = session.metadata_file_as_dict()
+        assert metadata["learning"] == expected_learning_metadata
+        assert metadata["evaluation"] == expected_evaluation_metadata