Refactored the benchmarking process

2024-06-07 19:59:55 +01:00
parent 3bad9aa51e
commit 4068231547
13 changed files with 77 additions and 12731 deletions
--- a/benchmark/benchmark.py
+++ b/benchmark/benchmark.py
@@ -0,0 +1,21 @@
+from typing import Any, Dict, Optional, Tuple
+
+from gymnasium.core import ObsType
+
+from primaite.session.environment import PrimaiteGymEnv
+
+
+class BenchmarkPrimaiteGymEnv(PrimaiteGymEnv):
+    """
+    Class that extends the PrimaiteGymEnv.
+
+    The reset method is extended so that the average rewards per episode are recorded.
+    """
+
+    total_time_steps: int = 0
+
+    def reset(self, seed: Optional[int] = None) -> Tuple[ObsType, Dict[str, Any]]:
+        """Overrides the PrimAITEGymEnv reset so that the total timesteps is saved."""
+        self.total_time_steps += self.game.step_counter
+
+        return super().reset(seed=seed)
--- a/benchmark/primaite_benchmark.py
+++ b/benchmark/primaite_benchmark.py
@@ -1,27 +1,27 @@
 # © Crown-owned copyright 2023, Defence Science and Technology Laboratory UK
+import json
 import shutil
 from datetime import datetime
 from pathlib import Path
 from typing import Any, Dict, Final, Tuple

+from report import build_benchmark_latex_report
 from stable_baselines3 import PPO

 import primaite
-from benchmark.utils.benchmark import BenchmarkPrimaiteGymEnv
-from benchmark.utils.report import build_benchmark_latex_report
+from benchmark import BenchmarkPrimaiteGymEnv
 from primaite.config.load import data_manipulation_config_path

 _LOGGER = primaite.getLogger(__name__)

-_BENCHMARK_ROOT = Path(__file__).parent
-_RESULTS_ROOT: Final[Path] = _BENCHMARK_ROOT / "results"
-_RESULTS_ROOT.mkdir(exist_ok=True, parents=True)
+_MAJOR_V = primaite.__version__.split(".")[0]

-_OUTPUT_ROOT: Final[Path] = _BENCHMARK_ROOT / "output"
-# Clear and recreate the output directory
-if _OUTPUT_ROOT.exists():
-    shutil.rmtree(_OUTPUT_ROOT)
-_OUTPUT_ROOT.mkdir()
+_BENCHMARK_ROOT = Path(__file__).parent
+_RESULTS_ROOT: Final[Path] = _BENCHMARK_ROOT / "results" / f"v{_MAJOR_V}"
+_VERSION_ROOT: Final[Path] = _RESULTS_ROOT / f"v{primaite.__version__}"
+_SESSION_METADATA_ROOT: Final[Path] = _VERSION_ROOT / "session_metadata"
+
+_SESSION_METADATA_ROOT.mkdir(parents=True, exist_ok=True)


 class BenchmarkSession:
@@ -33,7 +33,10 @@ class BenchmarkSession:
    num_episodes: int
    """Number of episodes to run the training session."""

-    num_steps: int
+    episode_len: int
+    """The number of steps per episode."""
+
+    total_steps: int
    """Number of steps to run the training session."""

    batch_size: int
@@ -48,16 +51,21 @@ class BenchmarkSession:
    end_time: datetime
    """End time for the session."""

-    session_metadata: Dict
-    """Dict containing the metadata for the session - used to generate benchmark report."""
-
    def __init__(
-        self, gym_env: BenchmarkPrimaiteGymEnv, num_episodes: int, num_steps: int, batch_size: int, learning_rate: float
+        self,
+        gym_env: BenchmarkPrimaiteGymEnv,
+        episode_len: int,
+        num_episodes: int,
+        n_steps: int,
+        batch_size: int,
+        learning_rate: float,
    ):
        """Initialise the BenchmarkSession."""
        self.gym_env = gym_env
+        self.episode_len = episode_len
+        self.n_steps = n_steps
        self.num_episodes = num_episodes
-        self.num_steps = num_steps
+        self.total_steps = self.num_episodes * self.episode_len
        self.batch_size = batch_size
        self.learning_rate = learning_rate

@@ -65,15 +73,16 @@ class BenchmarkSession:
        """Run the training session."""
        # start timer for session
        self.start_time = datetime.now()
-
        model = PPO(
            policy="MlpPolicy",
            env=self.gym_env,
            learning_rate=self.learning_rate,
-            n_steps=self.num_steps * self.num_episodes,
-            batch_size=self.num_steps * self.num_episodes,
+            n_steps=self.n_steps,
+            batch_size=self.batch_size,
+            verbose=0,
+            tensorboard_log="./PPO_UC2/",
        )
-        model.learn(total_timesteps=self.num_episodes * self.num_steps)
+        model.learn(total_timesteps=self.total_steps)

        # end timer for session
        self.end_time = datetime.now()
@@ -142,12 +151,13 @@ def _prepare_session_directory():


 def run(
-    number_of_sessions: int = 10,
-    num_episodes: int = 1000,
-    num_timesteps: int = 128,
-    batch_size: int = 1280,
+    number_of_sessions: int = 2,
+    num_episodes: int = 5,
+    episode_len: int = 128,
+    n_steps: int = 1280,
+    batch_size: int = 32,
    learning_rate: float = 3e-4,
-) -> None:  # 10  # 1000  # 256
+) -> None:
    """Run the PrimAITE benchmark."""
    benchmark_start_time = datetime.now()

@@ -163,13 +173,20 @@ def run(
            session = BenchmarkSession(
                gym_env=gym_env,
                num_episodes=num_episodes,
-                num_steps=num_timesteps,
+                n_steps=n_steps,
+                episode_len=episode_len,
                batch_size=batch_size,
                learning_rate=learning_rate,
            )
            session.train()
-            session_metadata_dict[i] = session.session_metadata

+            # Dump the session metadata so that we're not holding it in memory as it's large
+            with open(_SESSION_METADATA_ROOT / f"{i}.json", "w") as file:
+                json.dump(session.session_metadata, file, indent=4)
+
+    for i in range(1, number_of_sessions + 1):
+        with open(_SESSION_METADATA_ROOT / f"{i}.json", "r") as file:
+            session_metadata_dict[i] = json.load(file)
    # generate report
    build_benchmark_latex_report(
        benchmark_start_time=benchmark_start_time,
--- a/benchmark/utils/report.py
+++ b/benchmark/utils/report.py
@@ -1,6 +1,5 @@
 # © Crown-owned copyright 2023, Defence Science and Technology Laboratory UK
 import json
-import shutil
 import sys
 from datetime import datetime
 from pathlib import Path
@@ -14,9 +13,9 @@ from pylatex import Command, Document
 from pylatex import Figure as LatexFigure
 from pylatex import Section, Subsection, Tabular
 from pylatex.utils import bold
+from utils import _get_system_info

 import primaite
-from benchmark.utils.utils import _get_system_info

 PLOT_CONFIG = {
    "size": {"auto_size": False, "width": 1500, "height": 900},
@@ -124,7 +123,7 @@ def _plot_benchmark_metadata(
            "title": "Episode",
            "type": "linear",
        },
-        yaxis={"title": "Average Reward"},
+        yaxis={"title": "Total Reward"},
        title=title,
    )

@@ -140,7 +139,8 @@ def _plot_all_benchmarks_combined_session_av(results_directory: Path) -> Figure:
    benchmarked. The combined_av_reward_per_episode is extracted from each,
    converted into a polars dataframe, and plotted as a scatter line in plotly.
    """
-    title = "PrimAITE Versions Learning Benchmark"
+    major_v = primaite.__version__.split(".")[0]
+    title = f"Learning Benchmarking of All Released Versions under Major v{major_v}.*.*"
    subtitle = "Rolling Av (Combined Session Av)"
    if title:
        if subtitle:
@@ -172,7 +172,7 @@ def _plot_all_benchmarks_combined_session_av(results_directory: Path) -> Figure:
            "title": "Episode",
            "type": "linear",
        },
-        yaxis={"title": "Average Reward"},
+        yaxis={"title": "Total Reward"},
        title=title,
    )
    fig["data"][0]["showlegend"] = True
@@ -188,8 +188,6 @@ def build_benchmark_latex_report(
    v_str = f"v{primaite.__version__}"

    version_result_dir = results_root_path / v_str
-    if version_result_dir.exists():
-        shutil.rmtree(version_result_dir)
    version_result_dir.mkdir(exist_ok=True, parents=True)

    # load the config file as dict
@@ -200,7 +198,7 @@ def build_benchmark_latex_report(
    benchmark_metadata_dict = _build_benchmark_results_dict(
        start_datetime=benchmark_start_time, metadata_dict=session_metadata, config=cfg_data
    )
-
+    major_v = primaite.__version__.split(".")[0]
    with open(version_result_dir / f"{v_str}_benchmark_metadata.json", "w") as file:
        json.dump(benchmark_metadata_dict, file, indent=4)
    title = f"PrimAITE v{primaite.__version__.strip()} Learning Benchmark"
@@ -241,9 +239,9 @@ def build_benchmark_latex_report(
            f"with each episode consisting of {steps} steps."
        )
        doc.append(
-            f"\nThe mean reward per episode from each session is captured. This is then used to calculate a "
-            f"combined average reward per episode from the {sessions} individual sessions for smoothing. "
-            f"Finally, a 25-widow rolling average of the combined average reward per session is calculated for "
+            f"\nThe total reward per episode from each session is captured. This is then used to calculate an "
+            f"caverage total reward per episode from the {sessions} individual sessions for smoothing. "
+            f"Finally, a 25-widow rolling average of the average total reward per session is calculated for "
            f"further smoothing."
        )

@@ -294,14 +292,14 @@ def build_benchmark_latex_report(
                    table.add_hline()

    with doc.create(Section("Graphs")):
-        with doc.create(Subsection(f"PrimAITE {primaite_version} Learning Benchmark Plot")):
+        with doc.create(Subsection(f"v{primaite_version} Learning Benchmark Plot")):
            with doc.create(LatexFigure(position="h!")) as pic:
                pic.add_image(str(this_version_plot_path))
                pic.add_caption(f"PrimAITE {primaite_version} Learning Benchmark Plot")

-        with doc.create(Subsection("PrimAITE All Versions Learning Benchmark Plot")):
+        with doc.create(Subsection(f"Learning Benchmarking of All Released Versions under Major v{major_v}.*.*")):
            with doc.create(LatexFigure(position="h!")) as pic:
                pic.add_image(str(all_version_plot_path))
-                pic.add_caption("PrimAITE All Versions Learning Benchmark Plot")
+                pic.add_caption(f"Learning Benchmarking of All Released Versions under Major v{major_v}.*.*")

    doc.generate_pdf(str(this_version_plot_path).replace(".png", ""), clean_tex=True)
--- a/benchmark/results/PrimAITE
+++ b/benchmark/results/PrimAITE
--- a/benchmark/results/v2/PrimAITE
+++ b/benchmark/results/v2/PrimAITE
--- a/benchmark/results/v2/v2.0.0/PrimAITE
+++ b/benchmark/results/v2/v2.0.0/PrimAITE
--- a/benchmark/results/v2/v2.0.0/PrimAITE
+++ b/benchmark/results/v2/v2.0.0/PrimAITE
--- a/benchmark/results/v2/v2.0.0/v2.0.0_benchmark_metadata.json
+++ b/benchmark/results/v2/v2.0.0/v2.0.0_benchmark_metadata.json
--- a/benchmark/results/v3.0.0/PrimAITE
+++ b/benchmark/results/v3.0.0/PrimAITE
--- a/benchmark/results/v3.0.0/PrimAITE
+++ b/benchmark/results/v3.0.0/PrimAITE
@@ -1,122 +0,0 @@
-\documentclass{article}%
-\usepackage[T1]{fontenc}%
-\usepackage[utf8]{inputenc}%
-\usepackage{lmodern}%
-\usepackage{textcomp}%
-\usepackage{lastpage}%
-\usepackage{geometry}%
-\geometry{tmargin=2.5cm,rmargin=2.5cm,bmargin=2.5cm,lmargin=2.5cm}%
-\usepackage{graphicx}%
-%
-\title{PrimAITE 3.0.0 Learning Benchmark}%
-\author{PrimAITE Dev Team}%
-\date{2024{-}06{-}01}%
-%
-\begin{document}%
-\normalsize%
-\maketitle%
-\section{Introduction}%
-\label{sec:Introduction}%
-PrimAITE v3.0.0 was benchmarked automatically upon release. Learning rate metrics were captured to be referenced during system{-}level testing and user acceptance testing (UAT).%
-\newline%
-The benchmarking process consists of running 10 training session using the same config file. Each session trains an agent for 1000 episodes, with each episode consisting of 128 steps.%
-\newline%
-The mean reward per episode from each session is captured. This is then used to calculate a combined average reward per episode from the 10 individual sessions for smoothing. Finally, a 25{-}widow rolling average of the combined average reward per session is calculated for further smoothing.
-
-%
-\section{System Information}%
-\label{sec:SystemInformation}%
-\subsection{Python}%
-\label{subsec:Python}%
-\begin{tabular}{|l|l|}%
-\hline%
-\textbf{Version}&3.8.10 (tags/v3.8.10:3d8993a, May  3 2021, 11:48:03) {[}MSC v.1928 64 bit (AMD64){]}\\%
-\hline%
-\end{tabular}
-
-%
-\subsection{System}%
-\label{subsec:System}%
-\begin{tabular}{|l|l|}%
-\hline%
-\textbf{OS}&Windows\\%
-\hline%
-\textbf{OS Version}&10.0.19045\\%
-\hline%
-\textbf{Machine}&AMD64\\%
-\hline%
-\textbf{Processor}&Intel64 Family 6 Model 85 Stepping 4, GenuineIntel\\%
-\hline%
-\end{tabular}
-
-%
-\subsection{CPU}%
-\label{subsec:CPU}%
-\begin{tabular}{|l|l|}%
-\hline%
-\textbf{Physical Cores}&6\\%
-\hline%
-\textbf{Total Cores}&12\\%
-\hline%
-\textbf{Max Frequency}&3600.00Mhz\\%
-\hline%
-\end{tabular}
-
-%
-\subsection{Memory}%
-\label{subsec:Memory}%
-\begin{tabular}{|l|l|}%
-\hline%
-\textbf{Total}&63.52GB\\%
-\hline%
-\textbf{Swap Total}&9.50GB\\%
-\hline%
-\end{tabular}
-
-%
-\section{Stats}%
-\label{sec:Stats}%
-\subsection{Benchmark Results}%
-\label{subsec:BenchmarkResults}%
-\begin{tabular}{|l|l|}%
-\hline%
-\textbf{Total Sessions}&10\\%
-\hline%
-\textbf{Total Episodes}&10010\\%
-\hline%
-\textbf{Total Steps}&1280000\\%
-\hline%
-\textbf{Av Session Duration (s)}&1569.8775\\%
-\hline%
-\textbf{Av Step Duration (s)}&0.0012\\%
-\hline%
-\textbf{Av Duration per 100 Steps per 10 Nodes (s)}&0.1226\\%
-\hline%
-\end{tabular}
-
-%
-\section{Graphs}%
-\label{sec:Graphs}%
-\subsection{PrimAITE 3.0.0 Learning Benchmark Plot}%
-\label{subsec:PrimAITE3.0.0LearningBenchmarkPlot}%
-
-
-\begin{figure}[h!]%
-\centering%
-\includegraphics[width=0.8\textwidth]{D:/Projects/ARCD/PrimAITE/PrimAITE/benchmark/results/v3.0.0/PrimAITE v3.0.0 Learning Benchmark.png}%
-\caption{PrimAITE 3.0.0 Learning Benchmark Plot}%
-\end{figure}
-
-%
-\subsection{PrimAITE All Versions Learning Benchmark Plot}%
-\label{subsec:PrimAITEAllVersionsLearningBenchmarkPlot}%
-
-
-\begin{figure}[h!]%
-\centering%
-\includegraphics[width=0.8\textwidth]{D:/Projects/ARCD/PrimAITE/PrimAITE/benchmark/results/PrimAITE Versions Learning Benchmark.png}%
-\caption{PrimAITE All Versions Learning Benchmark Plot}%
-\end{figure}
-
-%
-\end{document}
--- a/benchmark/results/v3.0.0/v3.0.0_benchmark_metadata.json
+++ b/benchmark/results/v3.0.0/v3.0.0_benchmark_metadata.json
--- a/benchmark/utils/utils.py
+++ b/benchmark/utils/utils.py
--- a/benchmark/utils/benchmark.py
+++ b/benchmark/utils/benchmark.py
@@ -1,122 +0,0 @@
-from datetime import datetime
-from typing import Any, Dict, List, Optional, Tuple
-
-from gymnasium.core import ObsType
-
-from primaite.session.environment import PrimaiteGymEnv
-
-
-class BenchmarkPrimaiteGymEnv(PrimaiteGymEnv):
-    """
-    Class that extends the PrimaiteGymEnv.
-
-    The reset method is extended so that the average rewards per episode are recorded.
-    """
-
-    total_time_steps: int = 0
-
-    def reset(self, seed: Optional[int] = None) -> Tuple[ObsType, Dict[str, Any]]:
-        """Overrides the PrimAITEGymEnv reset so that the total timesteps is saved."""
-        self.total_time_steps += self.game.step_counter
-
-        return super().reset(seed=seed)
-
-
-#####################################
-# IGNORE BELOW FOR NOW
-#####################################
-
-
-class BenchMarkOSInfo:
-    """Operating System Information about the machine that run the benchmark."""
-
-    operating_system: str
-    """The operating system the benchmark was run on."""
-
-    operating_system_version: str
-    """The operating system version the benchmark was run on."""
-
-    machine: str
-    """The type of machine running the benchmark."""
-
-    processor: str
-    """The processor used to run the benchmark."""
-
-
-class BenchMarkCPUInfo:
-    """CPU Information of the machine that ran the benchmark."""
-
-    physical_cores: int
-    """The number of CPU cores the machine that ran the benchmark had."""
-
-    total_cores: int
-    """The number of total cores the machine that run the benchmark had."""
-
-    max_frequency: int
-    """The CPU's maximum clock speed."""
-
-
-class BenchMarkMemoryInfo:
-    """The Memory Information of the machine that ran the benchmark."""
-
-    total: str
-    """The total amount of memory."""
-
-    swap_total: str
-    """Virtual memory."""
-
-
-class BenchMarkGPUInfo:
-    """The GPU Information of the machine that ran the benchmark."""
-
-    name: str
-    """GPU name."""
-
-    total_memory: str
-    """GPU memory."""
-
-
-class BenchMarkSystemInfo:
-    """Overall system information of the machine that ran the benchmark."""
-
-    system: BenchMarkOSInfo
-    cpu: BenchMarkCPUInfo
-    memory: BenchMarkMemoryInfo
-    gpu: List[BenchMarkMemoryInfo]
-
-
-class BenchMarkResult:
-    """Class containing the relevant benchmark results."""
-
-    benchmark_start_time: datetime
-    """Start time of the benchmark run."""
-
-    benchmark_end_time: datetime
-    """End time of the benchmark run."""
-
-    primaite_version: str
-    """The version of PrimAITE being benchmarked."""
-
-    system_info: BenchMarkSystemInfo
-    """System information of the machine that ran the benchmark."""
-
-    total_sessions: int
-    """The number of sessions that the benchmark ran."""
-
-    total_episodes: int
-    """The number of episodes over all the sessions that the benchmark ran."""
-
-    total_timesteps: int
-    """The number of timesteps over all the sessions that the benchmark ran."""
-
-    average_seconds_per_session: float
-    """The average time per session."""
-
-    average_seconds_per_step: float
-    """The average time per step."""
-
-    average_seconds_per_100_steps_and_10_nodes: float
-    """The average time per 100 steps on a 10 node network."""
-
-    combined_average_reward_per_episode: Dict
-    """tbd."""