Refactored the benchmarking process

This commit is contained in:
Chris McCarthy
2024-06-07 19:59:55 +01:00
parent 3bad9aa51e
commit 4068231547
13 changed files with 77 additions and 12731 deletions

21
benchmark/benchmark.py Normal file
View File

@@ -0,0 +1,21 @@
from typing import Any, Dict, Optional, Tuple
from gymnasium.core import ObsType
from primaite.session.environment import PrimaiteGymEnv
class BenchmarkPrimaiteGymEnv(PrimaiteGymEnv):
"""
Class that extends the PrimaiteGymEnv.
The reset method is extended so that the average rewards per episode are recorded.
"""
total_time_steps: int = 0
def reset(self, seed: Optional[int] = None) -> Tuple[ObsType, Dict[str, Any]]:
"""Overrides the PrimAITEGymEnv reset so that the total timesteps is saved."""
self.total_time_steps += self.game.step_counter
return super().reset(seed=seed)

View File

@@ -1,27 +1,27 @@
# © Crown-owned copyright 2023, Defence Science and Technology Laboratory UK
import json
import shutil
from datetime import datetime
from pathlib import Path
from typing import Any, Dict, Final, Tuple
from report import build_benchmark_latex_report
from stable_baselines3 import PPO
import primaite
from benchmark.utils.benchmark import BenchmarkPrimaiteGymEnv
from benchmark.utils.report import build_benchmark_latex_report
from benchmark import BenchmarkPrimaiteGymEnv
from primaite.config.load import data_manipulation_config_path
_LOGGER = primaite.getLogger(__name__)
_BENCHMARK_ROOT = Path(__file__).parent
_RESULTS_ROOT: Final[Path] = _BENCHMARK_ROOT / "results"
_RESULTS_ROOT.mkdir(exist_ok=True, parents=True)
_MAJOR_V = primaite.__version__.split(".")[0]
_OUTPUT_ROOT: Final[Path] = _BENCHMARK_ROOT / "output"
# Clear and recreate the output directory
if _OUTPUT_ROOT.exists():
shutil.rmtree(_OUTPUT_ROOT)
_OUTPUT_ROOT.mkdir()
_BENCHMARK_ROOT = Path(__file__).parent
_RESULTS_ROOT: Final[Path] = _BENCHMARK_ROOT / "results" / f"v{_MAJOR_V}"
_VERSION_ROOT: Final[Path] = _RESULTS_ROOT / f"v{primaite.__version__}"
_SESSION_METADATA_ROOT: Final[Path] = _VERSION_ROOT / "session_metadata"
_SESSION_METADATA_ROOT.mkdir(parents=True, exist_ok=True)
class BenchmarkSession:
@@ -33,7 +33,10 @@ class BenchmarkSession:
num_episodes: int
"""Number of episodes to run the training session."""
num_steps: int
episode_len: int
"""The number of steps per episode."""
total_steps: int
"""Number of steps to run the training session."""
batch_size: int
@@ -48,16 +51,21 @@ class BenchmarkSession:
end_time: datetime
"""End time for the session."""
session_metadata: Dict
"""Dict containing the metadata for the session - used to generate benchmark report."""
def __init__(
self, gym_env: BenchmarkPrimaiteGymEnv, num_episodes: int, num_steps: int, batch_size: int, learning_rate: float
self,
gym_env: BenchmarkPrimaiteGymEnv,
episode_len: int,
num_episodes: int,
n_steps: int,
batch_size: int,
learning_rate: float,
):
"""Initialise the BenchmarkSession."""
self.gym_env = gym_env
self.episode_len = episode_len
self.n_steps = n_steps
self.num_episodes = num_episodes
self.num_steps = num_steps
self.total_steps = self.num_episodes * self.episode_len
self.batch_size = batch_size
self.learning_rate = learning_rate
@@ -65,15 +73,16 @@ class BenchmarkSession:
"""Run the training session."""
# start timer for session
self.start_time = datetime.now()
model = PPO(
policy="MlpPolicy",
env=self.gym_env,
learning_rate=self.learning_rate,
n_steps=self.num_steps * self.num_episodes,
batch_size=self.num_steps * self.num_episodes,
n_steps=self.n_steps,
batch_size=self.batch_size,
verbose=0,
tensorboard_log="./PPO_UC2/",
)
model.learn(total_timesteps=self.num_episodes * self.num_steps)
model.learn(total_timesteps=self.total_steps)
# end timer for session
self.end_time = datetime.now()
@@ -142,12 +151,13 @@ def _prepare_session_directory():
def run(
number_of_sessions: int = 10,
num_episodes: int = 1000,
num_timesteps: int = 128,
batch_size: int = 1280,
number_of_sessions: int = 2,
num_episodes: int = 5,
episode_len: int = 128,
n_steps: int = 1280,
batch_size: int = 32,
learning_rate: float = 3e-4,
) -> None: # 10 # 1000 # 256
) -> None:
"""Run the PrimAITE benchmark."""
benchmark_start_time = datetime.now()
@@ -163,13 +173,20 @@ def run(
session = BenchmarkSession(
gym_env=gym_env,
num_episodes=num_episodes,
num_steps=num_timesteps,
n_steps=n_steps,
episode_len=episode_len,
batch_size=batch_size,
learning_rate=learning_rate,
)
session.train()
session_metadata_dict[i] = session.session_metadata
# Dump the session metadata so that we're not holding it in memory as it's large
with open(_SESSION_METADATA_ROOT / f"{i}.json", "w") as file:
json.dump(session.session_metadata, file, indent=4)
for i in range(1, number_of_sessions + 1):
with open(_SESSION_METADATA_ROOT / f"{i}.json", "r") as file:
session_metadata_dict[i] = json.load(file)
# generate report
build_benchmark_latex_report(
benchmark_start_time=benchmark_start_time,

View File

@@ -1,6 +1,5 @@
# © Crown-owned copyright 2023, Defence Science and Technology Laboratory UK
import json
import shutil
import sys
from datetime import datetime
from pathlib import Path
@@ -14,9 +13,9 @@ from pylatex import Command, Document
from pylatex import Figure as LatexFigure
from pylatex import Section, Subsection, Tabular
from pylatex.utils import bold
from utils import _get_system_info
import primaite
from benchmark.utils.utils import _get_system_info
PLOT_CONFIG = {
"size": {"auto_size": False, "width": 1500, "height": 900},
@@ -124,7 +123,7 @@ def _plot_benchmark_metadata(
"title": "Episode",
"type": "linear",
},
yaxis={"title": "Average Reward"},
yaxis={"title": "Total Reward"},
title=title,
)
@@ -140,7 +139,8 @@ def _plot_all_benchmarks_combined_session_av(results_directory: Path) -> Figure:
benchmarked. The combined_av_reward_per_episode is extracted from each,
converted into a polars dataframe, and plotted as a scatter line in plotly.
"""
title = "PrimAITE Versions Learning Benchmark"
major_v = primaite.__version__.split(".")[0]
title = f"Learning Benchmarking of All Released Versions under Major v{major_v}.*.*"
subtitle = "Rolling Av (Combined Session Av)"
if title:
if subtitle:
@@ -172,7 +172,7 @@ def _plot_all_benchmarks_combined_session_av(results_directory: Path) -> Figure:
"title": "Episode",
"type": "linear",
},
yaxis={"title": "Average Reward"},
yaxis={"title": "Total Reward"},
title=title,
)
fig["data"][0]["showlegend"] = True
@@ -188,8 +188,6 @@ def build_benchmark_latex_report(
v_str = f"v{primaite.__version__}"
version_result_dir = results_root_path / v_str
if version_result_dir.exists():
shutil.rmtree(version_result_dir)
version_result_dir.mkdir(exist_ok=True, parents=True)
# load the config file as dict
@@ -200,7 +198,7 @@ def build_benchmark_latex_report(
benchmark_metadata_dict = _build_benchmark_results_dict(
start_datetime=benchmark_start_time, metadata_dict=session_metadata, config=cfg_data
)
major_v = primaite.__version__.split(".")[0]
with open(version_result_dir / f"{v_str}_benchmark_metadata.json", "w") as file:
json.dump(benchmark_metadata_dict, file, indent=4)
title = f"PrimAITE v{primaite.__version__.strip()} Learning Benchmark"
@@ -241,9 +239,9 @@ def build_benchmark_latex_report(
f"with each episode consisting of {steps} steps."
)
doc.append(
f"\nThe mean reward per episode from each session is captured. This is then used to calculate a "
f"combined average reward per episode from the {sessions} individual sessions for smoothing. "
f"Finally, a 25-widow rolling average of the combined average reward per session is calculated for "
f"\nThe total reward per episode from each session is captured. This is then used to calculate an "
f"caverage total reward per episode from the {sessions} individual sessions for smoothing. "
f"Finally, a 25-widow rolling average of the average total reward per session is calculated for "
f"further smoothing."
)
@@ -294,14 +292,14 @@ def build_benchmark_latex_report(
table.add_hline()
with doc.create(Section("Graphs")):
with doc.create(Subsection(f"PrimAITE {primaite_version} Learning Benchmark Plot")):
with doc.create(Subsection(f"v{primaite_version} Learning Benchmark Plot")):
with doc.create(LatexFigure(position="h!")) as pic:
pic.add_image(str(this_version_plot_path))
pic.add_caption(f"PrimAITE {primaite_version} Learning Benchmark Plot")
with doc.create(Subsection("PrimAITE All Versions Learning Benchmark Plot")):
with doc.create(Subsection(f"Learning Benchmarking of All Released Versions under Major v{major_v}.*.*")):
with doc.create(LatexFigure(position="h!")) as pic:
pic.add_image(str(all_version_plot_path))
pic.add_caption("PrimAITE All Versions Learning Benchmark Plot")
pic.add_caption(f"Learning Benchmarking of All Released Versions under Major v{major_v}.*.*")
doc.generate_pdf(str(this_version_plot_path).replace(".png", ""), clean_tex=True)

Binary file not shown.

Before

Width:  |  Height:  |  Size: 90 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 79 KiB

View File

Before

Width:  |  Height:  |  Size: 225 KiB

After

Width:  |  Height:  |  Size: 225 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 411 KiB

View File

@@ -1,122 +0,0 @@
\documentclass{article}%
\usepackage[T1]{fontenc}%
\usepackage[utf8]{inputenc}%
\usepackage{lmodern}%
\usepackage{textcomp}%
\usepackage{lastpage}%
\usepackage{geometry}%
\geometry{tmargin=2.5cm,rmargin=2.5cm,bmargin=2.5cm,lmargin=2.5cm}%
\usepackage{graphicx}%
%
\title{PrimAITE 3.0.0 Learning Benchmark}%
\author{PrimAITE Dev Team}%
\date{2024{-}06{-}01}%
%
\begin{document}%
\normalsize%
\maketitle%
\section{Introduction}%
\label{sec:Introduction}%
PrimAITE v3.0.0 was benchmarked automatically upon release. Learning rate metrics were captured to be referenced during system{-}level testing and user acceptance testing (UAT).%
\newline%
The benchmarking process consists of running 10 training session using the same config file. Each session trains an agent for 1000 episodes, with each episode consisting of 128 steps.%
\newline%
The mean reward per episode from each session is captured. This is then used to calculate a combined average reward per episode from the 10 individual sessions for smoothing. Finally, a 25{-}widow rolling average of the combined average reward per session is calculated for further smoothing.
%
\section{System Information}%
\label{sec:SystemInformation}%
\subsection{Python}%
\label{subsec:Python}%
\begin{tabular}{|l|l|}%
\hline%
\textbf{Version}&3.8.10 (tags/v3.8.10:3d8993a, May 3 2021, 11:48:03) {[}MSC v.1928 64 bit (AMD64){]}\\%
\hline%
\end{tabular}
%
\subsection{System}%
\label{subsec:System}%
\begin{tabular}{|l|l|}%
\hline%
\textbf{OS}&Windows\\%
\hline%
\textbf{OS Version}&10.0.19045\\%
\hline%
\textbf{Machine}&AMD64\\%
\hline%
\textbf{Processor}&Intel64 Family 6 Model 85 Stepping 4, GenuineIntel\\%
\hline%
\end{tabular}
%
\subsection{CPU}%
\label{subsec:CPU}%
\begin{tabular}{|l|l|}%
\hline%
\textbf{Physical Cores}&6\\%
\hline%
\textbf{Total Cores}&12\\%
\hline%
\textbf{Max Frequency}&3600.00Mhz\\%
\hline%
\end{tabular}
%
\subsection{Memory}%
\label{subsec:Memory}%
\begin{tabular}{|l|l|}%
\hline%
\textbf{Total}&63.52GB\\%
\hline%
\textbf{Swap Total}&9.50GB\\%
\hline%
\end{tabular}
%
\section{Stats}%
\label{sec:Stats}%
\subsection{Benchmark Results}%
\label{subsec:BenchmarkResults}%
\begin{tabular}{|l|l|}%
\hline%
\textbf{Total Sessions}&10\\%
\hline%
\textbf{Total Episodes}&10010\\%
\hline%
\textbf{Total Steps}&1280000\\%
\hline%
\textbf{Av Session Duration (s)}&1569.8775\\%
\hline%
\textbf{Av Step Duration (s)}&0.0012\\%
\hline%
\textbf{Av Duration per 100 Steps per 10 Nodes (s)}&0.1226\\%
\hline%
\end{tabular}
%
\section{Graphs}%
\label{sec:Graphs}%
\subsection{PrimAITE 3.0.0 Learning Benchmark Plot}%
\label{subsec:PrimAITE3.0.0LearningBenchmarkPlot}%
\begin{figure}[h!]%
\centering%
\includegraphics[width=0.8\textwidth]{D:/Projects/ARCD/PrimAITE/PrimAITE/benchmark/results/v3.0.0/PrimAITE v3.0.0 Learning Benchmark.png}%
\caption{PrimAITE 3.0.0 Learning Benchmark Plot}%
\end{figure}
%
\subsection{PrimAITE All Versions Learning Benchmark Plot}%
\label{subsec:PrimAITEAllVersionsLearningBenchmarkPlot}%
\begin{figure}[h!]%
\centering%
\includegraphics[width=0.8\textwidth]{D:/Projects/ARCD/PrimAITE/PrimAITE/benchmark/results/PrimAITE Versions Learning Benchmark.png}%
\caption{PrimAITE All Versions Learning Benchmark Plot}%
\end{figure}
%
\end{document}

File diff suppressed because it is too large Load Diff

View File

@@ -1,122 +0,0 @@
from datetime import datetime
from typing import Any, Dict, List, Optional, Tuple
from gymnasium.core import ObsType
from primaite.session.environment import PrimaiteGymEnv
class BenchmarkPrimaiteGymEnv(PrimaiteGymEnv):
"""
Class that extends the PrimaiteGymEnv.
The reset method is extended so that the average rewards per episode are recorded.
"""
total_time_steps: int = 0
def reset(self, seed: Optional[int] = None) -> Tuple[ObsType, Dict[str, Any]]:
"""Overrides the PrimAITEGymEnv reset so that the total timesteps is saved."""
self.total_time_steps += self.game.step_counter
return super().reset(seed=seed)
#####################################
# IGNORE BELOW FOR NOW
#####################################
class BenchMarkOSInfo:
"""Operating System Information about the machine that run the benchmark."""
operating_system: str
"""The operating system the benchmark was run on."""
operating_system_version: str
"""The operating system version the benchmark was run on."""
machine: str
"""The type of machine running the benchmark."""
processor: str
"""The processor used to run the benchmark."""
class BenchMarkCPUInfo:
"""CPU Information of the machine that ran the benchmark."""
physical_cores: int
"""The number of CPU cores the machine that ran the benchmark had."""
total_cores: int
"""The number of total cores the machine that run the benchmark had."""
max_frequency: int
"""The CPU's maximum clock speed."""
class BenchMarkMemoryInfo:
"""The Memory Information of the machine that ran the benchmark."""
total: str
"""The total amount of memory."""
swap_total: str
"""Virtual memory."""
class BenchMarkGPUInfo:
"""The GPU Information of the machine that ran the benchmark."""
name: str
"""GPU name."""
total_memory: str
"""GPU memory."""
class BenchMarkSystemInfo:
"""Overall system information of the machine that ran the benchmark."""
system: BenchMarkOSInfo
cpu: BenchMarkCPUInfo
memory: BenchMarkMemoryInfo
gpu: List[BenchMarkMemoryInfo]
class BenchMarkResult:
"""Class containing the relevant benchmark results."""
benchmark_start_time: datetime
"""Start time of the benchmark run."""
benchmark_end_time: datetime
"""End time of the benchmark run."""
primaite_version: str
"""The version of PrimAITE being benchmarked."""
system_info: BenchMarkSystemInfo
"""System information of the machine that ran the benchmark."""
total_sessions: int
"""The number of sessions that the benchmark ran."""
total_episodes: int
"""The number of episodes over all the sessions that the benchmark ran."""
total_timesteps: int
"""The number of timesteps over all the sessions that the benchmark ran."""
average_seconds_per_session: float
"""The average time per session."""
average_seconds_per_step: float
"""The average time per step."""
average_seconds_per_100_steps_and_10_nodes: float
"""The average time per 100 steps on a 10 node network."""
combined_average_reward_per_episode: Dict
"""tbd."""