Refactored the benchmarking process
This commit is contained in:
21
benchmark/benchmark.py
Normal file
21
benchmark/benchmark.py
Normal file
@@ -0,0 +1,21 @@
|
||||
from typing import Any, Dict, Optional, Tuple
|
||||
|
||||
from gymnasium.core import ObsType
|
||||
|
||||
from primaite.session.environment import PrimaiteGymEnv
|
||||
|
||||
|
||||
class BenchmarkPrimaiteGymEnv(PrimaiteGymEnv):
|
||||
"""
|
||||
Class that extends the PrimaiteGymEnv.
|
||||
|
||||
The reset method is extended so that the average rewards per episode are recorded.
|
||||
"""
|
||||
|
||||
total_time_steps: int = 0
|
||||
|
||||
def reset(self, seed: Optional[int] = None) -> Tuple[ObsType, Dict[str, Any]]:
|
||||
"""Overrides the PrimAITEGymEnv reset so that the total timesteps is saved."""
|
||||
self.total_time_steps += self.game.step_counter
|
||||
|
||||
return super().reset(seed=seed)
|
||||
@@ -1,27 +1,27 @@
|
||||
# © Crown-owned copyright 2023, Defence Science and Technology Laboratory UK
|
||||
import json
|
||||
import shutil
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, Final, Tuple
|
||||
|
||||
from report import build_benchmark_latex_report
|
||||
from stable_baselines3 import PPO
|
||||
|
||||
import primaite
|
||||
from benchmark.utils.benchmark import BenchmarkPrimaiteGymEnv
|
||||
from benchmark.utils.report import build_benchmark_latex_report
|
||||
from benchmark import BenchmarkPrimaiteGymEnv
|
||||
from primaite.config.load import data_manipulation_config_path
|
||||
|
||||
_LOGGER = primaite.getLogger(__name__)
|
||||
|
||||
_BENCHMARK_ROOT = Path(__file__).parent
|
||||
_RESULTS_ROOT: Final[Path] = _BENCHMARK_ROOT / "results"
|
||||
_RESULTS_ROOT.mkdir(exist_ok=True, parents=True)
|
||||
_MAJOR_V = primaite.__version__.split(".")[0]
|
||||
|
||||
_OUTPUT_ROOT: Final[Path] = _BENCHMARK_ROOT / "output"
|
||||
# Clear and recreate the output directory
|
||||
if _OUTPUT_ROOT.exists():
|
||||
shutil.rmtree(_OUTPUT_ROOT)
|
||||
_OUTPUT_ROOT.mkdir()
|
||||
_BENCHMARK_ROOT = Path(__file__).parent
|
||||
_RESULTS_ROOT: Final[Path] = _BENCHMARK_ROOT / "results" / f"v{_MAJOR_V}"
|
||||
_VERSION_ROOT: Final[Path] = _RESULTS_ROOT / f"v{primaite.__version__}"
|
||||
_SESSION_METADATA_ROOT: Final[Path] = _VERSION_ROOT / "session_metadata"
|
||||
|
||||
_SESSION_METADATA_ROOT.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
|
||||
class BenchmarkSession:
|
||||
@@ -33,7 +33,10 @@ class BenchmarkSession:
|
||||
num_episodes: int
|
||||
"""Number of episodes to run the training session."""
|
||||
|
||||
num_steps: int
|
||||
episode_len: int
|
||||
"""The number of steps per episode."""
|
||||
|
||||
total_steps: int
|
||||
"""Number of steps to run the training session."""
|
||||
|
||||
batch_size: int
|
||||
@@ -48,16 +51,21 @@ class BenchmarkSession:
|
||||
end_time: datetime
|
||||
"""End time for the session."""
|
||||
|
||||
session_metadata: Dict
|
||||
"""Dict containing the metadata for the session - used to generate benchmark report."""
|
||||
|
||||
def __init__(
|
||||
self, gym_env: BenchmarkPrimaiteGymEnv, num_episodes: int, num_steps: int, batch_size: int, learning_rate: float
|
||||
self,
|
||||
gym_env: BenchmarkPrimaiteGymEnv,
|
||||
episode_len: int,
|
||||
num_episodes: int,
|
||||
n_steps: int,
|
||||
batch_size: int,
|
||||
learning_rate: float,
|
||||
):
|
||||
"""Initialise the BenchmarkSession."""
|
||||
self.gym_env = gym_env
|
||||
self.episode_len = episode_len
|
||||
self.n_steps = n_steps
|
||||
self.num_episodes = num_episodes
|
||||
self.num_steps = num_steps
|
||||
self.total_steps = self.num_episodes * self.episode_len
|
||||
self.batch_size = batch_size
|
||||
self.learning_rate = learning_rate
|
||||
|
||||
@@ -65,15 +73,16 @@ class BenchmarkSession:
|
||||
"""Run the training session."""
|
||||
# start timer for session
|
||||
self.start_time = datetime.now()
|
||||
|
||||
model = PPO(
|
||||
policy="MlpPolicy",
|
||||
env=self.gym_env,
|
||||
learning_rate=self.learning_rate,
|
||||
n_steps=self.num_steps * self.num_episodes,
|
||||
batch_size=self.num_steps * self.num_episodes,
|
||||
n_steps=self.n_steps,
|
||||
batch_size=self.batch_size,
|
||||
verbose=0,
|
||||
tensorboard_log="./PPO_UC2/",
|
||||
)
|
||||
model.learn(total_timesteps=self.num_episodes * self.num_steps)
|
||||
model.learn(total_timesteps=self.total_steps)
|
||||
|
||||
# end timer for session
|
||||
self.end_time = datetime.now()
|
||||
@@ -142,12 +151,13 @@ def _prepare_session_directory():
|
||||
|
||||
|
||||
def run(
|
||||
number_of_sessions: int = 10,
|
||||
num_episodes: int = 1000,
|
||||
num_timesteps: int = 128,
|
||||
batch_size: int = 1280,
|
||||
number_of_sessions: int = 2,
|
||||
num_episodes: int = 5,
|
||||
episode_len: int = 128,
|
||||
n_steps: int = 1280,
|
||||
batch_size: int = 32,
|
||||
learning_rate: float = 3e-4,
|
||||
) -> None: # 10 # 1000 # 256
|
||||
) -> None:
|
||||
"""Run the PrimAITE benchmark."""
|
||||
benchmark_start_time = datetime.now()
|
||||
|
||||
@@ -163,13 +173,20 @@ def run(
|
||||
session = BenchmarkSession(
|
||||
gym_env=gym_env,
|
||||
num_episodes=num_episodes,
|
||||
num_steps=num_timesteps,
|
||||
n_steps=n_steps,
|
||||
episode_len=episode_len,
|
||||
batch_size=batch_size,
|
||||
learning_rate=learning_rate,
|
||||
)
|
||||
session.train()
|
||||
session_metadata_dict[i] = session.session_metadata
|
||||
|
||||
# Dump the session metadata so that we're not holding it in memory as it's large
|
||||
with open(_SESSION_METADATA_ROOT / f"{i}.json", "w") as file:
|
||||
json.dump(session.session_metadata, file, indent=4)
|
||||
|
||||
for i in range(1, number_of_sessions + 1):
|
||||
with open(_SESSION_METADATA_ROOT / f"{i}.json", "r") as file:
|
||||
session_metadata_dict[i] = json.load(file)
|
||||
# generate report
|
||||
build_benchmark_latex_report(
|
||||
benchmark_start_time=benchmark_start_time,
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
# © Crown-owned copyright 2023, Defence Science and Technology Laboratory UK
|
||||
import json
|
||||
import shutil
|
||||
import sys
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
@@ -14,9 +13,9 @@ from pylatex import Command, Document
|
||||
from pylatex import Figure as LatexFigure
|
||||
from pylatex import Section, Subsection, Tabular
|
||||
from pylatex.utils import bold
|
||||
from utils import _get_system_info
|
||||
|
||||
import primaite
|
||||
from benchmark.utils.utils import _get_system_info
|
||||
|
||||
PLOT_CONFIG = {
|
||||
"size": {"auto_size": False, "width": 1500, "height": 900},
|
||||
@@ -124,7 +123,7 @@ def _plot_benchmark_metadata(
|
||||
"title": "Episode",
|
||||
"type": "linear",
|
||||
},
|
||||
yaxis={"title": "Average Reward"},
|
||||
yaxis={"title": "Total Reward"},
|
||||
title=title,
|
||||
)
|
||||
|
||||
@@ -140,7 +139,8 @@ def _plot_all_benchmarks_combined_session_av(results_directory: Path) -> Figure:
|
||||
benchmarked. The combined_av_reward_per_episode is extracted from each,
|
||||
converted into a polars dataframe, and plotted as a scatter line in plotly.
|
||||
"""
|
||||
title = "PrimAITE Versions Learning Benchmark"
|
||||
major_v = primaite.__version__.split(".")[0]
|
||||
title = f"Learning Benchmarking of All Released Versions under Major v{major_v}.*.*"
|
||||
subtitle = "Rolling Av (Combined Session Av)"
|
||||
if title:
|
||||
if subtitle:
|
||||
@@ -172,7 +172,7 @@ def _plot_all_benchmarks_combined_session_av(results_directory: Path) -> Figure:
|
||||
"title": "Episode",
|
||||
"type": "linear",
|
||||
},
|
||||
yaxis={"title": "Average Reward"},
|
||||
yaxis={"title": "Total Reward"},
|
||||
title=title,
|
||||
)
|
||||
fig["data"][0]["showlegend"] = True
|
||||
@@ -188,8 +188,6 @@ def build_benchmark_latex_report(
|
||||
v_str = f"v{primaite.__version__}"
|
||||
|
||||
version_result_dir = results_root_path / v_str
|
||||
if version_result_dir.exists():
|
||||
shutil.rmtree(version_result_dir)
|
||||
version_result_dir.mkdir(exist_ok=True, parents=True)
|
||||
|
||||
# load the config file as dict
|
||||
@@ -200,7 +198,7 @@ def build_benchmark_latex_report(
|
||||
benchmark_metadata_dict = _build_benchmark_results_dict(
|
||||
start_datetime=benchmark_start_time, metadata_dict=session_metadata, config=cfg_data
|
||||
)
|
||||
|
||||
major_v = primaite.__version__.split(".")[0]
|
||||
with open(version_result_dir / f"{v_str}_benchmark_metadata.json", "w") as file:
|
||||
json.dump(benchmark_metadata_dict, file, indent=4)
|
||||
title = f"PrimAITE v{primaite.__version__.strip()} Learning Benchmark"
|
||||
@@ -241,9 +239,9 @@ def build_benchmark_latex_report(
|
||||
f"with each episode consisting of {steps} steps."
|
||||
)
|
||||
doc.append(
|
||||
f"\nThe mean reward per episode from each session is captured. This is then used to calculate a "
|
||||
f"combined average reward per episode from the {sessions} individual sessions for smoothing. "
|
||||
f"Finally, a 25-widow rolling average of the combined average reward per session is calculated for "
|
||||
f"\nThe total reward per episode from each session is captured. This is then used to calculate an "
|
||||
f"caverage total reward per episode from the {sessions} individual sessions for smoothing. "
|
||||
f"Finally, a 25-widow rolling average of the average total reward per session is calculated for "
|
||||
f"further smoothing."
|
||||
)
|
||||
|
||||
@@ -294,14 +292,14 @@ def build_benchmark_latex_report(
|
||||
table.add_hline()
|
||||
|
||||
with doc.create(Section("Graphs")):
|
||||
with doc.create(Subsection(f"PrimAITE {primaite_version} Learning Benchmark Plot")):
|
||||
with doc.create(Subsection(f"v{primaite_version} Learning Benchmark Plot")):
|
||||
with doc.create(LatexFigure(position="h!")) as pic:
|
||||
pic.add_image(str(this_version_plot_path))
|
||||
pic.add_caption(f"PrimAITE {primaite_version} Learning Benchmark Plot")
|
||||
|
||||
with doc.create(Subsection("PrimAITE All Versions Learning Benchmark Plot")):
|
||||
with doc.create(Subsection(f"Learning Benchmarking of All Released Versions under Major v{major_v}.*.*")):
|
||||
with doc.create(LatexFigure(position="h!")) as pic:
|
||||
pic.add_image(str(all_version_plot_path))
|
||||
pic.add_caption("PrimAITE All Versions Learning Benchmark Plot")
|
||||
pic.add_caption(f"Learning Benchmarking of All Released Versions under Major v{major_v}.*.*")
|
||||
|
||||
doc.generate_pdf(str(this_version_plot_path).replace(".png", ""), clean_tex=True)
|
||||
Binary file not shown.
|
Before Width: | Height: | Size: 90 KiB |
BIN
benchmark/results/v2/PrimAITE Versions Learning Benchmark.png
Normal file
BIN
benchmark/results/v2/PrimAITE Versions Learning Benchmark.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 79 KiB |
|
Before Width: | Height: | Size: 225 KiB After Width: | Height: | Size: 225 KiB |
Binary file not shown.
|
Before Width: | Height: | Size: 411 KiB |
@@ -1,122 +0,0 @@
|
||||
\documentclass{article}%
|
||||
\usepackage[T1]{fontenc}%
|
||||
\usepackage[utf8]{inputenc}%
|
||||
\usepackage{lmodern}%
|
||||
\usepackage{textcomp}%
|
||||
\usepackage{lastpage}%
|
||||
\usepackage{geometry}%
|
||||
\geometry{tmargin=2.5cm,rmargin=2.5cm,bmargin=2.5cm,lmargin=2.5cm}%
|
||||
\usepackage{graphicx}%
|
||||
%
|
||||
\title{PrimAITE 3.0.0 Learning Benchmark}%
|
||||
\author{PrimAITE Dev Team}%
|
||||
\date{2024{-}06{-}01}%
|
||||
%
|
||||
\begin{document}%
|
||||
\normalsize%
|
||||
\maketitle%
|
||||
\section{Introduction}%
|
||||
\label{sec:Introduction}%
|
||||
PrimAITE v3.0.0 was benchmarked automatically upon release. Learning rate metrics were captured to be referenced during system{-}level testing and user acceptance testing (UAT).%
|
||||
\newline%
|
||||
The benchmarking process consists of running 10 training session using the same config file. Each session trains an agent for 1000 episodes, with each episode consisting of 128 steps.%
|
||||
\newline%
|
||||
The mean reward per episode from each session is captured. This is then used to calculate a combined average reward per episode from the 10 individual sessions for smoothing. Finally, a 25{-}widow rolling average of the combined average reward per session is calculated for further smoothing.
|
||||
|
||||
%
|
||||
\section{System Information}%
|
||||
\label{sec:SystemInformation}%
|
||||
\subsection{Python}%
|
||||
\label{subsec:Python}%
|
||||
\begin{tabular}{|l|l|}%
|
||||
\hline%
|
||||
\textbf{Version}&3.8.10 (tags/v3.8.10:3d8993a, May 3 2021, 11:48:03) {[}MSC v.1928 64 bit (AMD64){]}\\%
|
||||
\hline%
|
||||
\end{tabular}
|
||||
|
||||
%
|
||||
\subsection{System}%
|
||||
\label{subsec:System}%
|
||||
\begin{tabular}{|l|l|}%
|
||||
\hline%
|
||||
\textbf{OS}&Windows\\%
|
||||
\hline%
|
||||
\textbf{OS Version}&10.0.19045\\%
|
||||
\hline%
|
||||
\textbf{Machine}&AMD64\\%
|
||||
\hline%
|
||||
\textbf{Processor}&Intel64 Family 6 Model 85 Stepping 4, GenuineIntel\\%
|
||||
\hline%
|
||||
\end{tabular}
|
||||
|
||||
%
|
||||
\subsection{CPU}%
|
||||
\label{subsec:CPU}%
|
||||
\begin{tabular}{|l|l|}%
|
||||
\hline%
|
||||
\textbf{Physical Cores}&6\\%
|
||||
\hline%
|
||||
\textbf{Total Cores}&12\\%
|
||||
\hline%
|
||||
\textbf{Max Frequency}&3600.00Mhz\\%
|
||||
\hline%
|
||||
\end{tabular}
|
||||
|
||||
%
|
||||
\subsection{Memory}%
|
||||
\label{subsec:Memory}%
|
||||
\begin{tabular}{|l|l|}%
|
||||
\hline%
|
||||
\textbf{Total}&63.52GB\\%
|
||||
\hline%
|
||||
\textbf{Swap Total}&9.50GB\\%
|
||||
\hline%
|
||||
\end{tabular}
|
||||
|
||||
%
|
||||
\section{Stats}%
|
||||
\label{sec:Stats}%
|
||||
\subsection{Benchmark Results}%
|
||||
\label{subsec:BenchmarkResults}%
|
||||
\begin{tabular}{|l|l|}%
|
||||
\hline%
|
||||
\textbf{Total Sessions}&10\\%
|
||||
\hline%
|
||||
\textbf{Total Episodes}&10010\\%
|
||||
\hline%
|
||||
\textbf{Total Steps}&1280000\\%
|
||||
\hline%
|
||||
\textbf{Av Session Duration (s)}&1569.8775\\%
|
||||
\hline%
|
||||
\textbf{Av Step Duration (s)}&0.0012\\%
|
||||
\hline%
|
||||
\textbf{Av Duration per 100 Steps per 10 Nodes (s)}&0.1226\\%
|
||||
\hline%
|
||||
\end{tabular}
|
||||
|
||||
%
|
||||
\section{Graphs}%
|
||||
\label{sec:Graphs}%
|
||||
\subsection{PrimAITE 3.0.0 Learning Benchmark Plot}%
|
||||
\label{subsec:PrimAITE3.0.0LearningBenchmarkPlot}%
|
||||
|
||||
|
||||
\begin{figure}[h!]%
|
||||
\centering%
|
||||
\includegraphics[width=0.8\textwidth]{D:/Projects/ARCD/PrimAITE/PrimAITE/benchmark/results/v3.0.0/PrimAITE v3.0.0 Learning Benchmark.png}%
|
||||
\caption{PrimAITE 3.0.0 Learning Benchmark Plot}%
|
||||
\end{figure}
|
||||
|
||||
%
|
||||
\subsection{PrimAITE All Versions Learning Benchmark Plot}%
|
||||
\label{subsec:PrimAITEAllVersionsLearningBenchmarkPlot}%
|
||||
|
||||
|
||||
\begin{figure}[h!]%
|
||||
\centering%
|
||||
\includegraphics[width=0.8\textwidth]{D:/Projects/ARCD/PrimAITE/PrimAITE/benchmark/results/PrimAITE Versions Learning Benchmark.png}%
|
||||
\caption{PrimAITE All Versions Learning Benchmark Plot}%
|
||||
\end{figure}
|
||||
|
||||
%
|
||||
\end{document}
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,122 +0,0 @@
|
||||
from datetime import datetime
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
from gymnasium.core import ObsType
|
||||
|
||||
from primaite.session.environment import PrimaiteGymEnv
|
||||
|
||||
|
||||
class BenchmarkPrimaiteGymEnv(PrimaiteGymEnv):
|
||||
"""
|
||||
Class that extends the PrimaiteGymEnv.
|
||||
|
||||
The reset method is extended so that the average rewards per episode are recorded.
|
||||
"""
|
||||
|
||||
total_time_steps: int = 0
|
||||
|
||||
def reset(self, seed: Optional[int] = None) -> Tuple[ObsType, Dict[str, Any]]:
|
||||
"""Overrides the PrimAITEGymEnv reset so that the total timesteps is saved."""
|
||||
self.total_time_steps += self.game.step_counter
|
||||
|
||||
return super().reset(seed=seed)
|
||||
|
||||
|
||||
#####################################
|
||||
# IGNORE BELOW FOR NOW
|
||||
#####################################
|
||||
|
||||
|
||||
class BenchMarkOSInfo:
|
||||
"""Operating System Information about the machine that run the benchmark."""
|
||||
|
||||
operating_system: str
|
||||
"""The operating system the benchmark was run on."""
|
||||
|
||||
operating_system_version: str
|
||||
"""The operating system version the benchmark was run on."""
|
||||
|
||||
machine: str
|
||||
"""The type of machine running the benchmark."""
|
||||
|
||||
processor: str
|
||||
"""The processor used to run the benchmark."""
|
||||
|
||||
|
||||
class BenchMarkCPUInfo:
|
||||
"""CPU Information of the machine that ran the benchmark."""
|
||||
|
||||
physical_cores: int
|
||||
"""The number of CPU cores the machine that ran the benchmark had."""
|
||||
|
||||
total_cores: int
|
||||
"""The number of total cores the machine that run the benchmark had."""
|
||||
|
||||
max_frequency: int
|
||||
"""The CPU's maximum clock speed."""
|
||||
|
||||
|
||||
class BenchMarkMemoryInfo:
|
||||
"""The Memory Information of the machine that ran the benchmark."""
|
||||
|
||||
total: str
|
||||
"""The total amount of memory."""
|
||||
|
||||
swap_total: str
|
||||
"""Virtual memory."""
|
||||
|
||||
|
||||
class BenchMarkGPUInfo:
|
||||
"""The GPU Information of the machine that ran the benchmark."""
|
||||
|
||||
name: str
|
||||
"""GPU name."""
|
||||
|
||||
total_memory: str
|
||||
"""GPU memory."""
|
||||
|
||||
|
||||
class BenchMarkSystemInfo:
|
||||
"""Overall system information of the machine that ran the benchmark."""
|
||||
|
||||
system: BenchMarkOSInfo
|
||||
cpu: BenchMarkCPUInfo
|
||||
memory: BenchMarkMemoryInfo
|
||||
gpu: List[BenchMarkMemoryInfo]
|
||||
|
||||
|
||||
class BenchMarkResult:
|
||||
"""Class containing the relevant benchmark results."""
|
||||
|
||||
benchmark_start_time: datetime
|
||||
"""Start time of the benchmark run."""
|
||||
|
||||
benchmark_end_time: datetime
|
||||
"""End time of the benchmark run."""
|
||||
|
||||
primaite_version: str
|
||||
"""The version of PrimAITE being benchmarked."""
|
||||
|
||||
system_info: BenchMarkSystemInfo
|
||||
"""System information of the machine that ran the benchmark."""
|
||||
|
||||
total_sessions: int
|
||||
"""The number of sessions that the benchmark ran."""
|
||||
|
||||
total_episodes: int
|
||||
"""The number of episodes over all the sessions that the benchmark ran."""
|
||||
|
||||
total_timesteps: int
|
||||
"""The number of timesteps over all the sessions that the benchmark ran."""
|
||||
|
||||
average_seconds_per_session: float
|
||||
"""The average time per session."""
|
||||
|
||||
average_seconds_per_step: float
|
||||
"""The average time per step."""
|
||||
|
||||
average_seconds_per_100_steps_and_10_nodes: float
|
||||
"""The average time per 100 steps on a 10 node network."""
|
||||
|
||||
combined_average_reward_per_episode: Dict
|
||||
"""tbd."""
|
||||
Reference in New Issue
Block a user