From 7054b775ffb6bb10c5572c216c0458f2bb90ad7d Mon Sep 17 00:00:00 2001 From: Chris McCarthy Date: Tue, 18 Jul 2023 10:11:01 +0100 Subject: [PATCH] #1632 - Added bench marking script --- .gitignore | 1 + .../config/benchmark_training_config.yaml | 163 ++++++++++++++++++ benchmark/primaite_benchmark.py | 122 +++++++++++++ 3 files changed, 286 insertions(+) create mode 100644 benchmark/config/benchmark_training_config.yaml create mode 100644 benchmark/primaite_benchmark.py diff --git a/.gitignore b/.gitignore index ef1050e6..3f3bbf05 100644 --- a/.gitignore +++ b/.gitignore @@ -147,3 +147,4 @@ docs/source/primaite-dependencies.rst # outputs src/primaite/outputs/ +/benchmark/output/ diff --git a/benchmark/config/benchmark_training_config.yaml b/benchmark/config/benchmark_training_config.yaml new file mode 100644 index 00000000..a194d1c6 --- /dev/null +++ b/benchmark/config/benchmark_training_config.yaml @@ -0,0 +1,163 @@ +# Training Config File + +# Sets which agent algorithm framework will be used. +# Options are: +# "SB3" (Stable Baselines3) +# "RLLIB" (Ray RLlib) +# "CUSTOM" (Custom Agent) +agent_framework: SB3 + +# Sets which deep learning framework will be used (by RLlib ONLY). +# Default is TF (Tensorflow). +# Options are: +# "TF" (Tensorflow) +# TF2 (Tensorflow 2.X) +# TORCH (PyTorch) +deep_learning_framework: TF2 + +# Sets which Agent class will be used. +# Options are: +# "A2C" (Advantage Actor Critic coupled with either SB3 or RLLIB agent_framework) +# "PPO" (Proximal Policy Optimization coupled with either SB3 or RLLIB agent_framework) +# "HARDCODED" (The HardCoded agents coupled with an ACL or NODE action_type) +# "DO_NOTHING" (The DoNothing agents coupled with an ACL or NODE action_type) +# "RANDOM" (primaite.agents.simple.RandomAgent) +# "DUMMY" (primaite.agents.simple.DummyAgent) +agent_identifier: PPO + +# Sets whether Red Agent POL and IER is randomised. +# Options are: +# True +# False +random_red_agent: False + +# The (integer) seed to be used in random number generation +# Default is None (null) +seed: null + +# Set whether the agent will be deterministic instead of stochastic +# Options are: +# True +# False +deterministic: False + +# Sets what view of the environment the deterministic hardcoded agent has. The default is BASIC. +# Options are: +# "BASIC" (The current observation space only) +# "FULL" (Full environment view with actions taken and reward feedback) +hard_coded_agent_view: FULL + +# Sets How the Action Space is defined: +# "NODE" +# "ACL" +# "ANY" node and acl actions +action_type: ANY +# observation space +observation_space: + flatten: true + components: + - name: NODE_LINK_TABLE + - name: NODE_STATUSES + - name: LINK_TRAFFIC_LEVELS + + +# Number of episodes for training to run per session +num_train_episodes: 10 + +# Number of time_steps for training per episode +num_train_steps: 256 + +# Number of episodes for evaluation to run per session +num_eval_episodes: 1 + +# Number of time_steps for evaluation per episode +num_eval_steps: 256 + +# Sets how often the agent will save a checkpoint (every n time episodes). +# Set to 0 if no checkpoints are required. Default is 10 +checkpoint_every_n_episodes: 0 + +# Time delay (milliseconds) between steps for CUSTOM agents. +time_delay: 5 + +# Type of session to be run. Options are: +# "TRAIN" (Trains an agent) +# "EVAL" (Evaluates an agent) +# "TRAIN_EVAL" (Trains then evaluates an agent) +session_type: TRAIN + +# Environment config values +# The high value for the observation space +observation_space_high_value: 1000000000 + +# The Stable Baselines3 learn/eval output verbosity level: +# Options are: +# "NONE" (No Output) +# "INFO" (Info Messages (such as devices and wrappers used)) +# "DEBUG" (All Messages) +sb3_output_verbose_level: NONE + +# Reward values +# Generic +all_ok: 0 +# Node Hardware State +off_should_be_on: -0.001 +off_should_be_resetting: -0.0005 +on_should_be_off: -0.0002 +on_should_be_resetting: -0.0005 +resetting_should_be_on: -0.0005 +resetting_should_be_off: -0.0002 +resetting: -0.0003 +# Node Software or Service State +good_should_be_patching: 0.0002 +good_should_be_compromised: 0.0005 +good_should_be_overwhelmed: 0.0005 +patching_should_be_good: -0.0005 +patching_should_be_compromised: 0.0002 +patching_should_be_overwhelmed: 0.0002 +patching: -0.0003 +compromised_should_be_good: -0.002 +compromised_should_be_patching: -0.002 +compromised_should_be_overwhelmed: -0.002 +compromised: -0.002 +overwhelmed_should_be_good: -0.002 +overwhelmed_should_be_patching: -0.002 +overwhelmed_should_be_compromised: -0.002 +overwhelmed: -0.002 +# Node File System State +good_should_be_repairing: 0.0002 +good_should_be_restoring: 0.0002 +good_should_be_corrupt: 0.0005 +good_should_be_destroyed: 0.001 +repairing_should_be_good: -0.0005 +repairing_should_be_restoring: 0.0002 +repairing_should_be_corrupt: 0.0002 +repairing_should_be_destroyed: 0.0000 +repairing: -0.0003 +restoring_should_be_good: -0.001 +restoring_should_be_repairing: -0.0002 +restoring_should_be_corrupt: 0.0001 +restoring_should_be_destroyed: 0.0002 +restoring: -0.0006 +corrupt_should_be_good: -0.001 +corrupt_should_be_repairing: -0.001 +corrupt_should_be_restoring: -0.001 +corrupt_should_be_destroyed: 0.0002 +corrupt: -0.001 +destroyed_should_be_good: -0.002 +destroyed_should_be_repairing: -0.002 +destroyed_should_be_restoring: -0.002 +destroyed_should_be_corrupt: -0.002 +destroyed: -0.002 +scanning: -0.0002 +# IER status +red_ier_running: -0.0005 +green_ier_blocked: -0.001 + +# Patching / Reset durations +os_patching_duration: 5 # The time taken to patch the OS +node_reset_duration: 5 # The time taken to reset a node (hardware) +service_patching_duration: 5 # The time taken to patch a service +file_system_repairing_limit: 5 # The time take to repair the file system +file_system_restoring_limit: 5 # The time take to restore the file system +file_system_scanning_limit: 5 # The time taken to scan the file system diff --git a/benchmark/primaite_benchmark.py b/benchmark/primaite_benchmark.py new file mode 100644 index 00000000..3c6055d0 --- /dev/null +++ b/benchmark/primaite_benchmark.py @@ -0,0 +1,122 @@ +import json +import shutil +from datetime import datetime +from pathlib import Path +from typing import Any, Dict, Final, Tuple +from unittest.mock import patch + +import primaite +from primaite.config.lay_down_config import data_manipulation_config_path +from tests.conftest import TempPrimaiteSession + +_LOGGER = primaite.getLogger(__name__) + +_RESULTS_ROOT: Final[Path] = Path(__file__).parent / "results" +_RESULTS_ROOT.mkdir(exist_ok=True, parents=True) + +_OUTPUT_ROOT: Final[Path] = Path(__file__).parent / "output" +# Clear and recreate the output directory +shutil.rmtree(_OUTPUT_ROOT) +_OUTPUT_ROOT.mkdir() + + +class BenchmarkPrimaiteSession(TempPrimaiteSession): + """A benchmarking primaite session.""" + + def _learn_benchmark_durations(self) -> Tuple[float, float, float]: + """ + Calculate and return the learning benchmark durations. + + Calculates the: + - Total learning time in seconds + - Total learning time per time step in seconds + - Total learning time per 100 time steps per 10 nodes in seconds + + :return: The learning benchmark durations as a Tuple of three floats: + Tuple[total_s, s_per_step, s_per_100_steps_10_nodes]. + """ + data = self.metadata_file_as_dict() + start_dt = datetime.fromisoformat(data["start_datetime"]) + end_dt = datetime.fromisoformat(data["end_datetime"]) + delta = end_dt - start_dt + total_s = delta.total_seconds() + + total_steps = data["learning"]["total_time_steps"] + s_per_step = total_s / total_steps + + num_nodes = self.env.num_nodes + num_intervals = total_steps / 100 + av_interval_time = total_s / num_intervals + s_per_100_steps_10_nodes = av_interval_time / (num_nodes / 10) + + return total_s, s_per_step, s_per_100_steps_10_nodes + + def learn_metadata_dict(self) -> Dict[str, Any]: + """Metadata specific to the learning session.""" + total_s, s_per_step, s_per_100_steps_10_nodes = self._learn_benchmark_durations() + return { + "total_episodes": self.env.actual_episode_count, + "total_time_steps": self.env.total_step_count, + "total_s": total_s, + "s_per_step": s_per_step, + "s_per_100_steps_10_nodes": s_per_100_steps_10_nodes, + "av_reward_per_episode": self.learn_av_reward_per_episode_dict(), + } + + +def _get_benchmark_session_path(session_timestamp: datetime) -> Path: + return _OUTPUT_ROOT / session_timestamp.strftime("%Y-%m-%d_%H-%M-%S") + + +def _get_benchmark_primaite_session() -> BenchmarkPrimaiteSession: + with patch("primaite.agents.agent_abc.get_session_path", _get_benchmark_session_path) as mck: + mck.session_timestamp = datetime.now() + path = Path(__file__).parent / "config/benchmark_training_config.yaml" + return BenchmarkPrimaiteSession(path, data_manipulation_config_path()) + + +def _summarise_metadata_dict_results(data: Dict) -> Dict: + n = len(data) + averaged_data = { + "total_sessions": n, + "total_episodes": sum(d["total_episodes"] for d in data.values()), + "total_time_steps": sum(d["total_time_steps"] for d in data.values()), + "av_s_per_session": sum(d["total_s"] for d in data.values()) / n, + "av_s_per_step": sum(d["s_per_step"] for d in data.values()) / n, + "av_s_per_100_steps_10_nodes": sum(d["s_per_100_steps_10_nodes"] for d in data.values()) / n, + "av_reward_per_episode": {}, + } + + av_reward_per_episode_keys = data[1]["av_reward_per_episode"].keys() + + for episode_key in av_reward_per_episode_keys: + averaged_data["av_reward_per_episode"][episode_key] = ( + sum(data[k]["av_reward_per_episode"][episode_key] for k in data.keys()) / n + ) + + return averaged_data + + +def run(): + """Run the PrimAITE benchmark.""" + av_reward_per_episode_dicts = {} + for i in range(1, 11): + print(f"starting Benchmark Session: {i}") + with _get_benchmark_primaite_session() as session: + session.learn() + av_reward_per_episode_dicts[i] = session.learn_metadata_dict() + + benchmark_metadata = _summarise_metadata_dict_results(av_reward_per_episode_dicts) + v_str = f"v{primaite.__version__}".strip() + + version_result_dir = _RESULTS_ROOT / v_str + if version_result_dir.exists(): + shutil.rmtree(version_result_dir) + version_result_dir.mkdir(exist_ok=True, parents=True) + + with open(version_result_dir / f"{v_str}_benchmark_metadata.json", "w") as file: + json.dump(benchmark_metadata, file, indent=4) + + +if __name__ == "__main__": + run()