From 7054b775ffb6bb10c5572c216c0458f2bb90ad7d Mon Sep 17 00:00:00 2001
From: Chris McCarthy <chris.mccarthy@methods.co.uk>
Date: Tue, 18 Jul 2023 10:11:01 +0100
Subject: [PATCH] #1632 - Added bench marking script

---
 .gitignore                                    |   1 +
 .../config/benchmark_training_config.yaml     | 163 ++++++++++++++++++
 benchmark/primaite_benchmark.py               | 122 +++++++++++++
 3 files changed, 286 insertions(+)
 create mode 100644 benchmark/config/benchmark_training_config.yaml
 create mode 100644 benchmark/primaite_benchmark.py

diff --git a/.gitignore b/.gitignore
index ef1050e6..3f3bbf05 100644
--- a/.gitignore
+++ b/.gitignore
@@ -147,3 +147,4 @@ docs/source/primaite-dependencies.rst
 
 # outputs
 src/primaite/outputs/
+/benchmark/output/
diff --git a/benchmark/config/benchmark_training_config.yaml b/benchmark/config/benchmark_training_config.yaml
new file mode 100644
index 00000000..a194d1c6
--- /dev/null
+++ b/benchmark/config/benchmark_training_config.yaml
@@ -0,0 +1,163 @@
+# Training Config File
+
+# Sets which agent algorithm framework will be used.
+# Options are:
+# "SB3" (Stable Baselines3)
+# "RLLIB" (Ray RLlib)
+# "CUSTOM" (Custom Agent)
+agent_framework: SB3
+
+# Sets which deep learning framework will be used (by RLlib ONLY).
+# Default is TF (Tensorflow).
+# Options are:
+# "TF" (Tensorflow)
+# TF2 (Tensorflow 2.X)
+# TORCH (PyTorch)
+deep_learning_framework: TF2
+
+# Sets which Agent class will be used.
+# Options are:
+# "A2C" (Advantage Actor Critic coupled with either SB3 or RLLIB agent_framework)
+# "PPO" (Proximal Policy Optimization coupled with either SB3 or RLLIB agent_framework)
+# "HARDCODED" (The HardCoded agents coupled with an ACL or NODE action_type)
+# "DO_NOTHING" (The DoNothing agents coupled with an ACL or NODE action_type)
+# "RANDOM" (primaite.agents.simple.RandomAgent)
+# "DUMMY" (primaite.agents.simple.DummyAgent)
+agent_identifier: PPO
+
+# Sets whether Red Agent POL and IER is randomised.
+# Options are:
+# True
+# False
+random_red_agent: False
+
+# The (integer) seed to be used in random number generation
+# Default is None (null)
+seed: null
+
+# Set whether the agent will be deterministic instead of stochastic
+# Options are:
+# True
+# False
+deterministic: False
+
+# Sets what view of the environment the deterministic hardcoded agent has. The default is BASIC.
+# Options are:
+# "BASIC" (The current observation space only)
+# "FULL" (Full environment view with actions taken and reward feedback)
+hard_coded_agent_view: FULL
+
+# Sets How the Action Space is defined:
+# "NODE"
+# "ACL"
+# "ANY" node and acl actions
+action_type: ANY
+# observation space
+observation_space:
+  flatten: true
+  components:
+    - name: NODE_LINK_TABLE
+    - name: NODE_STATUSES
+    - name: LINK_TRAFFIC_LEVELS
+
+
+# Number of episodes for training to run per session
+num_train_episodes: 10
+
+# Number of time_steps for training per episode
+num_train_steps: 256
+
+# Number of episodes for evaluation to run per session
+num_eval_episodes: 1
+
+# Number of time_steps for evaluation per episode
+num_eval_steps: 256
+
+# Sets how often the agent will save a checkpoint (every n time episodes).
+# Set to 0 if no checkpoints are required. Default is 10
+checkpoint_every_n_episodes: 0
+
+# Time delay (milliseconds) between steps for CUSTOM agents.
+time_delay: 5
+
+# Type of session to be run. Options are:
+# "TRAIN" (Trains an agent)
+# "EVAL" (Evaluates an agent)
+# "TRAIN_EVAL" (Trains then evaluates an agent)
+session_type: TRAIN
+
+# Environment config values
+# The high value for the observation space
+observation_space_high_value: 1000000000
+
+# The Stable Baselines3 learn/eval output verbosity level:
+# Options are:
+# "NONE" (No Output)
+# "INFO" (Info Messages (such as devices and wrappers used))
+# "DEBUG" (All Messages)
+sb3_output_verbose_level: NONE
+
+# Reward values
+# Generic
+all_ok: 0
+# Node Hardware State
+off_should_be_on: -0.001
+off_should_be_resetting: -0.0005
+on_should_be_off: -0.0002
+on_should_be_resetting: -0.0005
+resetting_should_be_on: -0.0005
+resetting_should_be_off: -0.0002
+resetting: -0.0003
+# Node Software or Service State
+good_should_be_patching: 0.0002
+good_should_be_compromised: 0.0005
+good_should_be_overwhelmed: 0.0005
+patching_should_be_good: -0.0005
+patching_should_be_compromised: 0.0002
+patching_should_be_overwhelmed: 0.0002
+patching: -0.0003
+compromised_should_be_good: -0.002
+compromised_should_be_patching: -0.002
+compromised_should_be_overwhelmed: -0.002
+compromised: -0.002
+overwhelmed_should_be_good: -0.002
+overwhelmed_should_be_patching: -0.002
+overwhelmed_should_be_compromised: -0.002
+overwhelmed: -0.002
+# Node File System State
+good_should_be_repairing: 0.0002
+good_should_be_restoring: 0.0002
+good_should_be_corrupt: 0.0005
+good_should_be_destroyed: 0.001
+repairing_should_be_good: -0.0005
+repairing_should_be_restoring: 0.0002
+repairing_should_be_corrupt: 0.0002
+repairing_should_be_destroyed: 0.0000
+repairing: -0.0003
+restoring_should_be_good: -0.001
+restoring_should_be_repairing: -0.0002
+restoring_should_be_corrupt: 0.0001
+restoring_should_be_destroyed: 0.0002
+restoring: -0.0006
+corrupt_should_be_good: -0.001
+corrupt_should_be_repairing: -0.001
+corrupt_should_be_restoring: -0.001
+corrupt_should_be_destroyed: 0.0002
+corrupt: -0.001
+destroyed_should_be_good: -0.002
+destroyed_should_be_repairing: -0.002
+destroyed_should_be_restoring: -0.002
+destroyed_should_be_corrupt: -0.002
+destroyed: -0.002
+scanning: -0.0002
+# IER status
+red_ier_running: -0.0005
+green_ier_blocked: -0.001
+
+# Patching / Reset durations
+os_patching_duration: 5            # The time taken to patch the OS
+node_reset_duration: 5             # The time taken to reset a node (hardware)
+service_patching_duration: 5       # The time taken to patch a service
+file_system_repairing_limit: 5      # The time take to repair the file system
+file_system_restoring_limit: 5      # The time take to restore the file system
+file_system_scanning_limit: 5       # The time taken to scan the file system
diff --git a/benchmark/primaite_benchmark.py b/benchmark/primaite_benchmark.py
new file mode 100644
index 00000000..3c6055d0
--- /dev/null
+++ b/benchmark/primaite_benchmark.py
@@ -0,0 +1,122 @@
+import json
+import shutil
+from datetime import datetime
+from pathlib import Path
+from typing import Any, Dict, Final, Tuple
+from unittest.mock import patch
+
+import primaite
+from primaite.config.lay_down_config import data_manipulation_config_path
+from tests.conftest import TempPrimaiteSession
+
+_LOGGER = primaite.getLogger(__name__)
+
+_RESULTS_ROOT: Final[Path] = Path(__file__).parent / "results"
+_RESULTS_ROOT.mkdir(exist_ok=True, parents=True)
+
+_OUTPUT_ROOT: Final[Path] = Path(__file__).parent / "output"
+# Clear and recreate the output directory
+shutil.rmtree(_OUTPUT_ROOT)
+_OUTPUT_ROOT.mkdir()
+
+
+class BenchmarkPrimaiteSession(TempPrimaiteSession):
+    """A benchmarking primaite session."""
+
+    def _learn_benchmark_durations(self) -> Tuple[float, float, float]:
+        """
+        Calculate and return the learning benchmark durations.
+
+        Calculates the:
+        - Total learning time in seconds
+        - Total learning time per time step in seconds
+        - Total learning time per 100 time steps per 10 nodes in seconds
+
+        :return: The learning benchmark durations as a Tuple of three floats:
+            Tuple[total_s, s_per_step, s_per_100_steps_10_nodes].
+        """
+        data = self.metadata_file_as_dict()
+        start_dt = datetime.fromisoformat(data["start_datetime"])
+        end_dt = datetime.fromisoformat(data["end_datetime"])
+        delta = end_dt - start_dt
+        total_s = delta.total_seconds()
+
+        total_steps = data["learning"]["total_time_steps"]
+        s_per_step = total_s / total_steps
+
+        num_nodes = self.env.num_nodes
+        num_intervals = total_steps / 100
+        av_interval_time = total_s / num_intervals
+        s_per_100_steps_10_nodes = av_interval_time / (num_nodes / 10)
+
+        return total_s, s_per_step, s_per_100_steps_10_nodes
+
+    def learn_metadata_dict(self) -> Dict[str, Any]:
+        """Metadata specific to the learning session."""
+        total_s, s_per_step, s_per_100_steps_10_nodes = self._learn_benchmark_durations()
+        return {
+            "total_episodes": self.env.actual_episode_count,
+            "total_time_steps": self.env.total_step_count,
+            "total_s": total_s,
+            "s_per_step": s_per_step,
+            "s_per_100_steps_10_nodes": s_per_100_steps_10_nodes,
+            "av_reward_per_episode": self.learn_av_reward_per_episode_dict(),
+        }
+
+
+def _get_benchmark_session_path(session_timestamp: datetime) -> Path:
+    return _OUTPUT_ROOT / session_timestamp.strftime("%Y-%m-%d_%H-%M-%S")
+
+
+def _get_benchmark_primaite_session() -> BenchmarkPrimaiteSession:
+    with patch("primaite.agents.agent_abc.get_session_path", _get_benchmark_session_path) as mck:
+        mck.session_timestamp = datetime.now()
+        path = Path(__file__).parent / "config/benchmark_training_config.yaml"
+        return BenchmarkPrimaiteSession(path, data_manipulation_config_path())
+
+
+def _summarise_metadata_dict_results(data: Dict) -> Dict:
+    n = len(data)
+    averaged_data = {
+        "total_sessions": n,
+        "total_episodes": sum(d["total_episodes"] for d in data.values()),
+        "total_time_steps": sum(d["total_time_steps"] for d in data.values()),
+        "av_s_per_session": sum(d["total_s"] for d in data.values()) / n,
+        "av_s_per_step": sum(d["s_per_step"] for d in data.values()) / n,
+        "av_s_per_100_steps_10_nodes": sum(d["s_per_100_steps_10_nodes"] for d in data.values()) / n,
+        "av_reward_per_episode": {},
+    }
+
+    av_reward_per_episode_keys = data[1]["av_reward_per_episode"].keys()
+
+    for episode_key in av_reward_per_episode_keys:
+        averaged_data["av_reward_per_episode"][episode_key] = (
+            sum(data[k]["av_reward_per_episode"][episode_key] for k in data.keys()) / n
+        )
+
+    return averaged_data
+
+
+def run():
+    """Run the PrimAITE benchmark."""
+    av_reward_per_episode_dicts = {}
+    for i in range(1, 11):
+        print(f"starting Benchmark Session: {i}")
+        with _get_benchmark_primaite_session() as session:
+            session.learn()
+            av_reward_per_episode_dicts[i] = session.learn_metadata_dict()
+
+    benchmark_metadata = _summarise_metadata_dict_results(av_reward_per_episode_dicts)
+    v_str = f"v{primaite.__version__}".strip()
+
+    version_result_dir = _RESULTS_ROOT / v_str
+    if version_result_dir.exists():
+        shutil.rmtree(version_result_dir)
+    version_result_dir.mkdir(exist_ok=True, parents=True)
+
+    with open(version_result_dir / f"{v_str}_benchmark_metadata.json", "w") as file:
+        json.dump(benchmark_metadata, file, indent=4)
+
+
+if __name__ == "__main__":
+    run()