#1632 - Added bench marking script
This commit is contained in:
1
.gitignore
vendored
1
.gitignore
vendored
@@ -147,3 +147,4 @@ docs/source/primaite-dependencies.rst
|
||||
|
||||
# outputs
|
||||
src/primaite/outputs/
|
||||
/benchmark/output/
|
||||
|
||||
163
benchmark/config/benchmark_training_config.yaml
Normal file
163
benchmark/config/benchmark_training_config.yaml
Normal file
@@ -0,0 +1,163 @@
|
||||
# Training Config File
|
||||
|
||||
# Sets which agent algorithm framework will be used.
|
||||
# Options are:
|
||||
# "SB3" (Stable Baselines3)
|
||||
# "RLLIB" (Ray RLlib)
|
||||
# "CUSTOM" (Custom Agent)
|
||||
agent_framework: SB3
|
||||
|
||||
# Sets which deep learning framework will be used (by RLlib ONLY).
|
||||
# Default is TF (Tensorflow).
|
||||
# Options are:
|
||||
# "TF" (Tensorflow)
|
||||
# TF2 (Tensorflow 2.X)
|
||||
# TORCH (PyTorch)
|
||||
deep_learning_framework: TF2
|
||||
|
||||
# Sets which Agent class will be used.
|
||||
# Options are:
|
||||
# "A2C" (Advantage Actor Critic coupled with either SB3 or RLLIB agent_framework)
|
||||
# "PPO" (Proximal Policy Optimization coupled with either SB3 or RLLIB agent_framework)
|
||||
# "HARDCODED" (The HardCoded agents coupled with an ACL or NODE action_type)
|
||||
# "DO_NOTHING" (The DoNothing agents coupled with an ACL or NODE action_type)
|
||||
# "RANDOM" (primaite.agents.simple.RandomAgent)
|
||||
# "DUMMY" (primaite.agents.simple.DummyAgent)
|
||||
agent_identifier: PPO
|
||||
|
||||
# Sets whether Red Agent POL and IER is randomised.
|
||||
# Options are:
|
||||
# True
|
||||
# False
|
||||
random_red_agent: False
|
||||
|
||||
# The (integer) seed to be used in random number generation
|
||||
# Default is None (null)
|
||||
seed: null
|
||||
|
||||
# Set whether the agent will be deterministic instead of stochastic
|
||||
# Options are:
|
||||
# True
|
||||
# False
|
||||
deterministic: False
|
||||
|
||||
# Sets what view of the environment the deterministic hardcoded agent has. The default is BASIC.
|
||||
# Options are:
|
||||
# "BASIC" (The current observation space only)
|
||||
# "FULL" (Full environment view with actions taken and reward feedback)
|
||||
hard_coded_agent_view: FULL
|
||||
|
||||
# Sets How the Action Space is defined:
|
||||
# "NODE"
|
||||
# "ACL"
|
||||
# "ANY" node and acl actions
|
||||
action_type: ANY
|
||||
# observation space
|
||||
observation_space:
|
||||
flatten: true
|
||||
components:
|
||||
- name: NODE_LINK_TABLE
|
||||
- name: NODE_STATUSES
|
||||
- name: LINK_TRAFFIC_LEVELS
|
||||
|
||||
|
||||
# Number of episodes for training to run per session
|
||||
num_train_episodes: 10
|
||||
|
||||
# Number of time_steps for training per episode
|
||||
num_train_steps: 256
|
||||
|
||||
# Number of episodes for evaluation to run per session
|
||||
num_eval_episodes: 1
|
||||
|
||||
# Number of time_steps for evaluation per episode
|
||||
num_eval_steps: 256
|
||||
|
||||
# Sets how often the agent will save a checkpoint (every n time episodes).
|
||||
# Set to 0 if no checkpoints are required. Default is 10
|
||||
checkpoint_every_n_episodes: 0
|
||||
|
||||
# Time delay (milliseconds) between steps for CUSTOM agents.
|
||||
time_delay: 5
|
||||
|
||||
# Type of session to be run. Options are:
|
||||
# "TRAIN" (Trains an agent)
|
||||
# "EVAL" (Evaluates an agent)
|
||||
# "TRAIN_EVAL" (Trains then evaluates an agent)
|
||||
session_type: TRAIN
|
||||
|
||||
# Environment config values
|
||||
# The high value for the observation space
|
||||
observation_space_high_value: 1000000000
|
||||
|
||||
# The Stable Baselines3 learn/eval output verbosity level:
|
||||
# Options are:
|
||||
# "NONE" (No Output)
|
||||
# "INFO" (Info Messages (such as devices and wrappers used))
|
||||
# "DEBUG" (All Messages)
|
||||
sb3_output_verbose_level: NONE
|
||||
|
||||
# Reward values
|
||||
# Generic
|
||||
all_ok: 0
|
||||
# Node Hardware State
|
||||
off_should_be_on: -0.001
|
||||
off_should_be_resetting: -0.0005
|
||||
on_should_be_off: -0.0002
|
||||
on_should_be_resetting: -0.0005
|
||||
resetting_should_be_on: -0.0005
|
||||
resetting_should_be_off: -0.0002
|
||||
resetting: -0.0003
|
||||
# Node Software or Service State
|
||||
good_should_be_patching: 0.0002
|
||||
good_should_be_compromised: 0.0005
|
||||
good_should_be_overwhelmed: 0.0005
|
||||
patching_should_be_good: -0.0005
|
||||
patching_should_be_compromised: 0.0002
|
||||
patching_should_be_overwhelmed: 0.0002
|
||||
patching: -0.0003
|
||||
compromised_should_be_good: -0.002
|
||||
compromised_should_be_patching: -0.002
|
||||
compromised_should_be_overwhelmed: -0.002
|
||||
compromised: -0.002
|
||||
overwhelmed_should_be_good: -0.002
|
||||
overwhelmed_should_be_patching: -0.002
|
||||
overwhelmed_should_be_compromised: -0.002
|
||||
overwhelmed: -0.002
|
||||
# Node File System State
|
||||
good_should_be_repairing: 0.0002
|
||||
good_should_be_restoring: 0.0002
|
||||
good_should_be_corrupt: 0.0005
|
||||
good_should_be_destroyed: 0.001
|
||||
repairing_should_be_good: -0.0005
|
||||
repairing_should_be_restoring: 0.0002
|
||||
repairing_should_be_corrupt: 0.0002
|
||||
repairing_should_be_destroyed: 0.0000
|
||||
repairing: -0.0003
|
||||
restoring_should_be_good: -0.001
|
||||
restoring_should_be_repairing: -0.0002
|
||||
restoring_should_be_corrupt: 0.0001
|
||||
restoring_should_be_destroyed: 0.0002
|
||||
restoring: -0.0006
|
||||
corrupt_should_be_good: -0.001
|
||||
corrupt_should_be_repairing: -0.001
|
||||
corrupt_should_be_restoring: -0.001
|
||||
corrupt_should_be_destroyed: 0.0002
|
||||
corrupt: -0.001
|
||||
destroyed_should_be_good: -0.002
|
||||
destroyed_should_be_repairing: -0.002
|
||||
destroyed_should_be_restoring: -0.002
|
||||
destroyed_should_be_corrupt: -0.002
|
||||
destroyed: -0.002
|
||||
scanning: -0.0002
|
||||
# IER status
|
||||
red_ier_running: -0.0005
|
||||
green_ier_blocked: -0.001
|
||||
|
||||
# Patching / Reset durations
|
||||
os_patching_duration: 5 # The time taken to patch the OS
|
||||
node_reset_duration: 5 # The time taken to reset a node (hardware)
|
||||
service_patching_duration: 5 # The time taken to patch a service
|
||||
file_system_repairing_limit: 5 # The time take to repair the file system
|
||||
file_system_restoring_limit: 5 # The time take to restore the file system
|
||||
file_system_scanning_limit: 5 # The time taken to scan the file system
|
||||
122
benchmark/primaite_benchmark.py
Normal file
122
benchmark/primaite_benchmark.py
Normal file
@@ -0,0 +1,122 @@
|
||||
import json
|
||||
import shutil
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, Final, Tuple
|
||||
from unittest.mock import patch
|
||||
|
||||
import primaite
|
||||
from primaite.config.lay_down_config import data_manipulation_config_path
|
||||
from tests.conftest import TempPrimaiteSession
|
||||
|
||||
_LOGGER = primaite.getLogger(__name__)
|
||||
|
||||
_RESULTS_ROOT: Final[Path] = Path(__file__).parent / "results"
|
||||
_RESULTS_ROOT.mkdir(exist_ok=True, parents=True)
|
||||
|
||||
_OUTPUT_ROOT: Final[Path] = Path(__file__).parent / "output"
|
||||
# Clear and recreate the output directory
|
||||
shutil.rmtree(_OUTPUT_ROOT)
|
||||
_OUTPUT_ROOT.mkdir()
|
||||
|
||||
|
||||
class BenchmarkPrimaiteSession(TempPrimaiteSession):
|
||||
"""A benchmarking primaite session."""
|
||||
|
||||
def _learn_benchmark_durations(self) -> Tuple[float, float, float]:
|
||||
"""
|
||||
Calculate and return the learning benchmark durations.
|
||||
|
||||
Calculates the:
|
||||
- Total learning time in seconds
|
||||
- Total learning time per time step in seconds
|
||||
- Total learning time per 100 time steps per 10 nodes in seconds
|
||||
|
||||
:return: The learning benchmark durations as a Tuple of three floats:
|
||||
Tuple[total_s, s_per_step, s_per_100_steps_10_nodes].
|
||||
"""
|
||||
data = self.metadata_file_as_dict()
|
||||
start_dt = datetime.fromisoformat(data["start_datetime"])
|
||||
end_dt = datetime.fromisoformat(data["end_datetime"])
|
||||
delta = end_dt - start_dt
|
||||
total_s = delta.total_seconds()
|
||||
|
||||
total_steps = data["learning"]["total_time_steps"]
|
||||
s_per_step = total_s / total_steps
|
||||
|
||||
num_nodes = self.env.num_nodes
|
||||
num_intervals = total_steps / 100
|
||||
av_interval_time = total_s / num_intervals
|
||||
s_per_100_steps_10_nodes = av_interval_time / (num_nodes / 10)
|
||||
|
||||
return total_s, s_per_step, s_per_100_steps_10_nodes
|
||||
|
||||
def learn_metadata_dict(self) -> Dict[str, Any]:
|
||||
"""Metadata specific to the learning session."""
|
||||
total_s, s_per_step, s_per_100_steps_10_nodes = self._learn_benchmark_durations()
|
||||
return {
|
||||
"total_episodes": self.env.actual_episode_count,
|
||||
"total_time_steps": self.env.total_step_count,
|
||||
"total_s": total_s,
|
||||
"s_per_step": s_per_step,
|
||||
"s_per_100_steps_10_nodes": s_per_100_steps_10_nodes,
|
||||
"av_reward_per_episode": self.learn_av_reward_per_episode_dict(),
|
||||
}
|
||||
|
||||
|
||||
def _get_benchmark_session_path(session_timestamp: datetime) -> Path:
|
||||
return _OUTPUT_ROOT / session_timestamp.strftime("%Y-%m-%d_%H-%M-%S")
|
||||
|
||||
|
||||
def _get_benchmark_primaite_session() -> BenchmarkPrimaiteSession:
|
||||
with patch("primaite.agents.agent_abc.get_session_path", _get_benchmark_session_path) as mck:
|
||||
mck.session_timestamp = datetime.now()
|
||||
path = Path(__file__).parent / "config/benchmark_training_config.yaml"
|
||||
return BenchmarkPrimaiteSession(path, data_manipulation_config_path())
|
||||
|
||||
|
||||
def _summarise_metadata_dict_results(data: Dict) -> Dict:
|
||||
n = len(data)
|
||||
averaged_data = {
|
||||
"total_sessions": n,
|
||||
"total_episodes": sum(d["total_episodes"] for d in data.values()),
|
||||
"total_time_steps": sum(d["total_time_steps"] for d in data.values()),
|
||||
"av_s_per_session": sum(d["total_s"] for d in data.values()) / n,
|
||||
"av_s_per_step": sum(d["s_per_step"] for d in data.values()) / n,
|
||||
"av_s_per_100_steps_10_nodes": sum(d["s_per_100_steps_10_nodes"] for d in data.values()) / n,
|
||||
"av_reward_per_episode": {},
|
||||
}
|
||||
|
||||
av_reward_per_episode_keys = data[1]["av_reward_per_episode"].keys()
|
||||
|
||||
for episode_key in av_reward_per_episode_keys:
|
||||
averaged_data["av_reward_per_episode"][episode_key] = (
|
||||
sum(data[k]["av_reward_per_episode"][episode_key] for k in data.keys()) / n
|
||||
)
|
||||
|
||||
return averaged_data
|
||||
|
||||
|
||||
def run():
|
||||
"""Run the PrimAITE benchmark."""
|
||||
av_reward_per_episode_dicts = {}
|
||||
for i in range(1, 11):
|
||||
print(f"starting Benchmark Session: {i}")
|
||||
with _get_benchmark_primaite_session() as session:
|
||||
session.learn()
|
||||
av_reward_per_episode_dicts[i] = session.learn_metadata_dict()
|
||||
|
||||
benchmark_metadata = _summarise_metadata_dict_results(av_reward_per_episode_dicts)
|
||||
v_str = f"v{primaite.__version__}".strip()
|
||||
|
||||
version_result_dir = _RESULTS_ROOT / v_str
|
||||
if version_result_dir.exists():
|
||||
shutil.rmtree(version_result_dir)
|
||||
version_result_dir.mkdir(exist_ok=True, parents=True)
|
||||
|
||||
with open(version_result_dir / f"{v_str}_benchmark_metadata.json", "w") as file:
|
||||
json.dump(benchmark_metadata, file, indent=4)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
run()
|
||||
Reference in New Issue
Block a user