retrospectively benchmarked 3.0.0 using the new benchmark process
This commit is contained in:
108
.azure/azure-benchmark-pipeline.yaml
Normal file
108
.azure/azure-benchmark-pipeline.yaml
Normal file
@@ -0,0 +1,108 @@
|
|||||||
|
trigger:
|
||||||
|
- release/*
|
||||||
|
|
||||||
|
schedules:
|
||||||
|
- cron: "0 2 * * 1-5" # Run at 2 AM every weekday
|
||||||
|
displayName: "Weekday Schedule"
|
||||||
|
branches:
|
||||||
|
include:
|
||||||
|
- 'refs/heads/dev'
|
||||||
|
variables:
|
||||||
|
VERSION: ''
|
||||||
|
MAJOR_VERSION: ''
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
- job: PrimAITE_Benchmark
|
||||||
|
timeoutInMinutes: 360 # 6-hour maximum
|
||||||
|
pool:
|
||||||
|
name: 'Imaginary Yak Pool'
|
||||||
|
workspace:
|
||||||
|
clean: all
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- checkout: self
|
||||||
|
persistCredentials: true
|
||||||
|
|
||||||
|
- script: |
|
||||||
|
python3.10 -m venv venv
|
||||||
|
displayName: 'Create venv'
|
||||||
|
|
||||||
|
- script: |
|
||||||
|
VERSION=$(cat src/primaite/VERSION | tr -d '\n')
|
||||||
|
if [[ "$(Build.SourceBranch)" == "refs/heads/dev" ]]; then
|
||||||
|
DATE=$(date +%Y%m%d)
|
||||||
|
echo "${VERSION}+dev.${DATE}" > src/primaite/VERSION
|
||||||
|
fi
|
||||||
|
displayName: 'Update VERSION file for Dev Benchmark'
|
||||||
|
|
||||||
|
- script: |
|
||||||
|
VERSION=$(cat src/primaite/VERSION | tr -d '\n')
|
||||||
|
MAJOR_VERSION=$(echo $VERSION | cut -d. -f1)
|
||||||
|
echo "##vso[task.setvariable variable=VERSION]$VERSION"
|
||||||
|
echo "##vso[task.setvariable variable=MAJOR_VERSION]$MAJOR_VERSION"
|
||||||
|
displayName: 'Set Version Variables'
|
||||||
|
|
||||||
|
- script: |
|
||||||
|
source venv/bin/activate
|
||||||
|
pip install --upgrade pip
|
||||||
|
pip install -e .[dev,rl]
|
||||||
|
primaite setup
|
||||||
|
displayName: 'Install Dependencies'
|
||||||
|
|
||||||
|
- script: |
|
||||||
|
set -e
|
||||||
|
source venv/bin/activate
|
||||||
|
cd benchmark
|
||||||
|
python primaite_benchmark.py
|
||||||
|
cd ..
|
||||||
|
displayName: 'Run Benchmarking Script'
|
||||||
|
|
||||||
|
- script: |
|
||||||
|
tar czf primaite_v$(VERSION)_benchmark.tar.gz benchmark/results/v$(MAJOR_VERSION)/v$(VERSION)
|
||||||
|
displayName: 'Prepare Artifacts for Publishing'
|
||||||
|
|
||||||
|
- task: PublishPipelineArtifact@1
|
||||||
|
inputs:
|
||||||
|
targetPath: primaite_v$(VERSION)_benchmark.tar.gz
|
||||||
|
artifactName: 'benchmark-zip-output'
|
||||||
|
publishLocation: 'pipeline'
|
||||||
|
displayName: 'Publish Benchmark Output zip as Artifact'
|
||||||
|
|
||||||
|
- script: |
|
||||||
|
git config --global user.email "oss@dstl.gov.uk"
|
||||||
|
git config --global user.name "Defence Science and Technology Laboratory UK"
|
||||||
|
workingDirectory: $(System.DefaultWorkingDirectory)
|
||||||
|
displayName: 'Configure Git'
|
||||||
|
condition: and(succeeded(), startsWith(variables['Build.SourceBranch'], 'refs/heads/release'))
|
||||||
|
|
||||||
|
- script: |
|
||||||
|
echo "Fetching all branches..."
|
||||||
|
git fetch --all --prune
|
||||||
|
|
||||||
|
echo "Stashing files..."
|
||||||
|
git stash push -u
|
||||||
|
|
||||||
|
echo "Resolving branch name..."
|
||||||
|
# Extracting just the branch name from the full ref path
|
||||||
|
branch_name=$(echo "$(Build.SourceBranch)" | sed 's|refs/heads/||')
|
||||||
|
echo "Branch Name: $branch_name"
|
||||||
|
|
||||||
|
echo "Checking out branch $branch_name..."
|
||||||
|
git checkout $branch_name
|
||||||
|
|
||||||
|
echo "Popping stash..."
|
||||||
|
git stash pop
|
||||||
|
|
||||||
|
echo "Adding benchmark results..."
|
||||||
|
git add benchmark/results/v$(MAJOR_VERSION)/v$(VERSION)/*
|
||||||
|
|
||||||
|
echo "Committing changes..."
|
||||||
|
git commit -m "Automated benchmark output commit for version $(VERSION) [skip ci]"
|
||||||
|
|
||||||
|
echo "Pushing to remote..."
|
||||||
|
git push origin $branch_name
|
||||||
|
displayName: 'Commit and Push Benchmark Results'
|
||||||
|
workingDirectory: $(System.DefaultWorkingDirectory)
|
||||||
|
env:
|
||||||
|
GIT_CREDENTIALS: $(System.AccessToken)
|
||||||
|
condition: and(succeeded(), startsWith(variables['Build.SourceBranch'], 'refs/heads/release'))
|
||||||
@@ -1,3 +1,4 @@
|
|||||||
|
# © Crown-owned copyright 2024, Defence Science and Technology Laboratory UK
|
||||||
from typing import Any, Dict, Optional, Tuple
|
from typing import Any, Dict, Optional, Tuple
|
||||||
|
|
||||||
from gymnasium.core import ObsType
|
from gymnasium.core import ObsType
|
||||||
|
|||||||
@@ -1,11 +1,11 @@
|
|||||||
# © Crown-owned copyright 2023, Defence Science and Technology Laboratory UK
|
# © Crown-owned copyright 2024, Defence Science and Technology Laboratory UK
|
||||||
import json
|
import json
|
||||||
import shutil
|
import shutil
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Any, Dict, Final, Tuple
|
from typing import Any, Dict, Final, Tuple
|
||||||
|
|
||||||
from report import build_benchmark_latex_report
|
from report import build_benchmark_md_report
|
||||||
from stable_baselines3 import PPO
|
from stable_baselines3 import PPO
|
||||||
|
|
||||||
import primaite
|
import primaite
|
||||||
@@ -117,14 +117,14 @@ class BenchmarkSession:
|
|||||||
def generate_learn_metadata_dict(self) -> Dict[str, Any]:
|
def generate_learn_metadata_dict(self) -> Dict[str, Any]:
|
||||||
"""Metadata specific to the learning session."""
|
"""Metadata specific to the learning session."""
|
||||||
total_s, s_per_step, s_per_100_steps_10_nodes = self._learn_benchmark_durations()
|
total_s, s_per_step, s_per_100_steps_10_nodes = self._learn_benchmark_durations()
|
||||||
self.gym_env.average_reward_per_episode.pop(0) # remove episode 0
|
self.gym_env.total_reward_per_episode.pop(0) # remove episode 0
|
||||||
return {
|
return {
|
||||||
"total_episodes": self.gym_env.episode_counter,
|
"total_episodes": self.gym_env.episode_counter,
|
||||||
"total_time_steps": self.gym_env.total_time_steps,
|
"total_time_steps": self.gym_env.total_time_steps,
|
||||||
"total_s": total_s,
|
"total_s": total_s,
|
||||||
"s_per_step": s_per_step,
|
"s_per_step": s_per_step,
|
||||||
"s_per_100_steps_10_nodes": s_per_100_steps_10_nodes,
|
"s_per_100_steps_10_nodes": s_per_100_steps_10_nodes,
|
||||||
"av_reward_per_episode": self.gym_env.average_reward_per_episode,
|
"total_reward_per_episode": self.gym_env.total_reward_per_episode,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -151,8 +151,8 @@ def _prepare_session_directory():
|
|||||||
|
|
||||||
|
|
||||||
def run(
|
def run(
|
||||||
number_of_sessions: int = 2,
|
number_of_sessions: int = 5,
|
||||||
num_episodes: int = 5,
|
num_episodes: int = 1000,
|
||||||
episode_len: int = 128,
|
episode_len: int = 128,
|
||||||
n_steps: int = 1280,
|
n_steps: int = 1280,
|
||||||
batch_size: int = 32,
|
batch_size: int = 32,
|
||||||
@@ -188,7 +188,7 @@ def run(
|
|||||||
with open(_SESSION_METADATA_ROOT / f"{i}.json", "r") as file:
|
with open(_SESSION_METADATA_ROOT / f"{i}.json", "r") as file:
|
||||||
session_metadata_dict[i] = json.load(file)
|
session_metadata_dict[i] = json.load(file)
|
||||||
# generate report
|
# generate report
|
||||||
build_benchmark_latex_report(
|
build_benchmark_md_report(
|
||||||
benchmark_start_time=benchmark_start_time,
|
benchmark_start_time=benchmark_start_time,
|
||||||
session_metadata=session_metadata_dict,
|
session_metadata=session_metadata_dict,
|
||||||
config_path=data_manipulation_config_path(),
|
config_path=data_manipulation_config_path(),
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
# © Crown-owned copyright 2023, Defence Science and Technology Laboratory UK
|
# © Crown-owned copyright 2024, Defence Science and Technology Laboratory UK
|
||||||
import json
|
import json
|
||||||
import sys
|
import sys
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
@@ -9,10 +9,6 @@ import plotly.graph_objects as go
|
|||||||
import polars as pl
|
import polars as pl
|
||||||
import yaml
|
import yaml
|
||||||
from plotly.graph_objs import Figure
|
from plotly.graph_objs import Figure
|
||||||
from pylatex import Command, Document
|
|
||||||
from pylatex import Figure as LatexFigure
|
|
||||||
from pylatex import Section, Subsection, Tabular
|
|
||||||
from pylatex.utils import bold
|
|
||||||
from utils import _get_system_info
|
from utils import _get_system_info
|
||||||
|
|
||||||
import primaite
|
import primaite
|
||||||
@@ -21,10 +17,20 @@ PLOT_CONFIG = {
|
|||||||
"size": {"auto_size": False, "width": 1500, "height": 900},
|
"size": {"auto_size": False, "width": 1500, "height": 900},
|
||||||
"template": "plotly_white",
|
"template": "plotly_white",
|
||||||
"range_slider": False,
|
"range_slider": False,
|
||||||
|
"av_s_per_100_steps_10_nodes_benchmark_threshold": 5,
|
||||||
|
"benchmark_line_color": "grey",
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
def _build_benchmark_results_dict(start_datetime: datetime, metadata_dict: Dict, config: Dict) -> dict:
|
def _build_benchmark_results_dict(start_datetime: datetime, metadata_dict: Dict, config: Dict) -> dict:
|
||||||
|
"""
|
||||||
|
Constructs a dictionary aggregating benchmark results from multiple sessions.
|
||||||
|
|
||||||
|
:param start_datetime: The datetime when the benchmarking started.
|
||||||
|
:param metadata_dict: Dictionary containing metadata for each session.
|
||||||
|
:param config: Configuration settings used during the benchmarking.
|
||||||
|
:return: A dictionary containing aggregated data and metadata from the benchmarking sessions.
|
||||||
|
"""
|
||||||
num_sessions = len(metadata_dict) # number of sessions
|
num_sessions = len(metadata_dict) # number of sessions
|
||||||
|
|
||||||
averaged_data = {
|
averaged_data = {
|
||||||
@@ -39,24 +45,30 @@ def _build_benchmark_results_dict(start_datetime: datetime, metadata_dict: Dict,
|
|||||||
"av_s_per_step": sum(d["s_per_step"] for d in metadata_dict.values()) / num_sessions,
|
"av_s_per_step": sum(d["s_per_step"] for d in metadata_dict.values()) / num_sessions,
|
||||||
"av_s_per_100_steps_10_nodes": sum(d["s_per_100_steps_10_nodes"] for d in metadata_dict.values())
|
"av_s_per_100_steps_10_nodes": sum(d["s_per_100_steps_10_nodes"] for d in metadata_dict.values())
|
||||||
/ num_sessions,
|
/ num_sessions,
|
||||||
"combined_av_reward_per_episode": {},
|
"combined_total_reward_per_episode": {},
|
||||||
"session_av_reward_per_episode": {k: v["av_reward_per_episode"] for k, v in metadata_dict.items()},
|
"session_total_reward_per_episode": {k: v["total_reward_per_episode"] for k, v in metadata_dict.items()},
|
||||||
"config": config,
|
"config": config,
|
||||||
}
|
}
|
||||||
|
|
||||||
# find the average of each episode across all sessions
|
# find the average of each episode across all sessions
|
||||||
episodes = metadata_dict[1]["av_reward_per_episode"].keys()
|
episodes = metadata_dict[1]["total_reward_per_episode"].keys()
|
||||||
|
|
||||||
for episode in episodes:
|
for episode in episodes:
|
||||||
combined_av_reward = (
|
combined_av_reward = (
|
||||||
sum(metadata_dict[k]["av_reward_per_episode"][episode] for k in metadata_dict.keys()) / num_sessions
|
sum(metadata_dict[k]["total_reward_per_episode"][episode] for k in metadata_dict.keys()) / num_sessions
|
||||||
)
|
)
|
||||||
averaged_data["combined_av_reward_per_episode"][episode] = combined_av_reward
|
averaged_data["combined_total_reward_per_episode"][episode] = combined_av_reward
|
||||||
|
|
||||||
return averaged_data
|
return averaged_data
|
||||||
|
|
||||||
|
|
||||||
def _get_df_from_episode_av_reward_dict(data: Dict) -> pl.DataFrame:
|
def _get_df_from_episode_av_reward_dict(data: Dict) -> pl.DataFrame:
|
||||||
|
"""
|
||||||
|
Converts a dictionary of episode average rewards into a Polars DataFrame.
|
||||||
|
|
||||||
|
:param data: Dictionary with episodes as keys and average rewards as values.
|
||||||
|
:return: Polars DataFrame with episodes and average rewards, including a rolling average.
|
||||||
|
"""
|
||||||
data: Dict = {"episode": data.keys(), "av_reward": data.values()}
|
data: Dict = {"episode": data.keys(), "av_reward": data.values()}
|
||||||
|
|
||||||
return (
|
return (
|
||||||
@@ -71,6 +83,14 @@ def _plot_benchmark_metadata(
|
|||||||
title: Optional[str] = None,
|
title: Optional[str] = None,
|
||||||
subtitle: Optional[str] = None,
|
subtitle: Optional[str] = None,
|
||||||
) -> Figure:
|
) -> Figure:
|
||||||
|
"""
|
||||||
|
Plots benchmark metadata as a line graph using Plotly.
|
||||||
|
|
||||||
|
:param benchmark_metadata_dict: Dictionary containing the total reward per episode and session.
|
||||||
|
:param title: Optional title for the graph.
|
||||||
|
:param subtitle: Optional subtitle for the graph.
|
||||||
|
:return: Plotly figure object representing the benchmark metadata plot.
|
||||||
|
"""
|
||||||
if title:
|
if title:
|
||||||
if subtitle:
|
if subtitle:
|
||||||
title = f"{title} <br>{subtitle}</sup>"
|
title = f"{title} <br>{subtitle}</sup>"
|
||||||
@@ -87,7 +107,7 @@ def _plot_benchmark_metadata(
|
|||||||
fig = go.Figure(layout=layout)
|
fig = go.Figure(layout=layout)
|
||||||
fig.update_layout(template=PLOT_CONFIG["template"])
|
fig.update_layout(template=PLOT_CONFIG["template"])
|
||||||
|
|
||||||
for session, av_reward_dict in benchmark_metadata_dict["session_av_reward_per_episode"].items():
|
for session, av_reward_dict in benchmark_metadata_dict["session_total_reward_per_episode"].items():
|
||||||
df = _get_df_from_episode_av_reward_dict(av_reward_dict)
|
df = _get_df_from_episode_av_reward_dict(av_reward_dict)
|
||||||
fig.add_trace(
|
fig.add_trace(
|
||||||
go.Scatter(
|
go.Scatter(
|
||||||
@@ -100,7 +120,7 @@ def _plot_benchmark_metadata(
|
|||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
df = _get_df_from_episode_av_reward_dict(benchmark_metadata_dict["combined_av_reward_per_episode"])
|
df = _get_df_from_episode_av_reward_dict(benchmark_metadata_dict["combined_total_reward_per_episode"])
|
||||||
fig.add_trace(
|
fig.add_trace(
|
||||||
go.Scatter(
|
go.Scatter(
|
||||||
x=df["episode"], y=df["av_reward"], mode="lines", name="Combined Session Av", line={"color": "#FF0000"}
|
x=df["episode"], y=df["av_reward"], mode="lines", name="Combined Session Av", line={"color": "#FF0000"}
|
||||||
@@ -136,11 +156,11 @@ def _plot_all_benchmarks_combined_session_av(results_directory: Path) -> Figure:
|
|||||||
|
|
||||||
Does this by iterating over the ``benchmark/results`` directory and
|
Does this by iterating over the ``benchmark/results`` directory and
|
||||||
extracting the benchmark metadata json for each version that has been
|
extracting the benchmark metadata json for each version that has been
|
||||||
benchmarked. The combined_av_reward_per_episode is extracted from each,
|
benchmarked. The combined_total_reward_per_episode is extracted from each,
|
||||||
converted into a polars dataframe, and plotted as a scatter line in plotly.
|
converted into a polars dataframe, and plotted as a scatter line in plotly.
|
||||||
"""
|
"""
|
||||||
major_v = primaite.__version__.split(".")[0]
|
major_v = primaite.__version__.split(".")[0]
|
||||||
title = f"Learning Benchmarking of All Released Versions under Major v{major_v}.*.*"
|
title = f"Learning Benchmark of Minor and Bugfix Releases for Major Version {major_v}"
|
||||||
subtitle = "Rolling Av (Combined Session Av)"
|
subtitle = "Rolling Av (Combined Session Av)"
|
||||||
if title:
|
if title:
|
||||||
if subtitle:
|
if subtitle:
|
||||||
@@ -162,7 +182,7 @@ def _plot_all_benchmarks_combined_session_av(results_directory: Path) -> Figure:
|
|||||||
metadata_file = dir / f"{dir.name}_benchmark_metadata.json"
|
metadata_file = dir / f"{dir.name}_benchmark_metadata.json"
|
||||||
with open(metadata_file, "r") as file:
|
with open(metadata_file, "r") as file:
|
||||||
metadata_dict = json.load(file)
|
metadata_dict = json.load(file)
|
||||||
df = _get_df_from_episode_av_reward_dict(metadata_dict["combined_av_reward_per_episode"])
|
df = _get_df_from_episode_av_reward_dict(metadata_dict["combined_total_reward_per_episode"])
|
||||||
|
|
||||||
fig.add_trace(go.Scatter(x=df["episode"], y=df["rolling_av_reward"], mode="lines", name=dir.name))
|
fig.add_trace(go.Scatter(x=df["episode"], y=df["rolling_av_reward"], mode="lines", name=dir.name))
|
||||||
|
|
||||||
@@ -180,10 +200,118 @@ def _plot_all_benchmarks_combined_session_av(results_directory: Path) -> Figure:
|
|||||||
return fig
|
return fig
|
||||||
|
|
||||||
|
|
||||||
def build_benchmark_latex_report(
|
def _get_performance_benchmark_for_all_version_dict(results_directory: Path) -> Dict[str, float]:
|
||||||
|
"""
|
||||||
|
Gathers performance benchmarks for all versions of the software stored in a specified directory.
|
||||||
|
|
||||||
|
This function iterates through each directory within the specified results directory,
|
||||||
|
extracts the av_s_per_100_steps_10_nodes from the benchmark_metadata.json files, and aggregates it into a
|
||||||
|
dictionary.
|
||||||
|
|
||||||
|
:param results_directory: The directory containing subdirectories for each version's benchmark data.
|
||||||
|
:return: A dictionary with version numbers as keys and their corresponding average performance benchmark
|
||||||
|
(average time per 100 steps on 10 nodes) as values.
|
||||||
|
"""
|
||||||
|
performance_benchmark_dict = {}
|
||||||
|
for dir in results_directory.iterdir():
|
||||||
|
if dir.is_dir():
|
||||||
|
metadata_file = dir / f"{dir.name}_benchmark_metadata.json"
|
||||||
|
with open(metadata_file, "r") as file:
|
||||||
|
metadata_dict = json.load(file)
|
||||||
|
version = metadata_dict["primaite_version"]
|
||||||
|
performance_benchmark_dict[version] = metadata_dict["av_s_per_100_steps_10_nodes"]
|
||||||
|
return performance_benchmark_dict
|
||||||
|
|
||||||
|
|
||||||
|
def _plot_av_s_per_100_steps_10_nodes(
|
||||||
|
version_times_dict: Dict[str, float],
|
||||||
|
) -> Figure:
|
||||||
|
"""
|
||||||
|
Creates a bar chart visualising the performance of each version of PrimAITE.
|
||||||
|
|
||||||
|
Performance is based on the average training time per 100 steps on 10 nodes. The function also includes a benchmark
|
||||||
|
line indicating the target maximum time.
|
||||||
|
|
||||||
|
Versions that perform under this time are marked in green, and those over are marked in red.
|
||||||
|
|
||||||
|
:param version_times_dict: A dictionary with software versions as keys and average times as values.
|
||||||
|
:return: A Plotly figure object representing the bar chart of the performance metrics.
|
||||||
|
"""
|
||||||
|
major_v = primaite.__version__.split(".")[0]
|
||||||
|
title = f"Performance of Minor and Bugfix Releases for Major Version {major_v}"
|
||||||
|
subtitle = (
|
||||||
|
f"Average Training Time per 100 Steps on 10 Nodes "
|
||||||
|
f"(target: <= {PLOT_CONFIG['av_s_per_100_steps_10_nodes_benchmark_threshold']} seconds)"
|
||||||
|
)
|
||||||
|
title = f"{title} <br><sub>{subtitle}</sub>"
|
||||||
|
|
||||||
|
layout = go.Layout(
|
||||||
|
autosize=PLOT_CONFIG["size"]["auto_size"],
|
||||||
|
width=PLOT_CONFIG["size"]["width"],
|
||||||
|
height=PLOT_CONFIG["size"]["height"],
|
||||||
|
)
|
||||||
|
fig = go.Figure(layout=layout)
|
||||||
|
fig.update_layout(template=PLOT_CONFIG["template"])
|
||||||
|
|
||||||
|
versions = sorted(list(version_times_dict.keys()))
|
||||||
|
times = [version_times_dict[version] for version in versions]
|
||||||
|
av_s_per_100_steps_10_nodes_benchmark_threshold = PLOT_CONFIG["av_s_per_100_steps_10_nodes_benchmark_threshold"]
|
||||||
|
benchmark_line_color = PLOT_CONFIG["benchmark_line_color"]
|
||||||
|
|
||||||
|
# Calculate the appropriate maximum y-axis value
|
||||||
|
max_y_axis_value = max(max(times), av_s_per_100_steps_10_nodes_benchmark_threshold) + 1
|
||||||
|
|
||||||
|
fig.add_trace(
|
||||||
|
go.Bar(
|
||||||
|
x=versions,
|
||||||
|
y=times,
|
||||||
|
marker_color=[
|
||||||
|
"green" if time < av_s_per_100_steps_10_nodes_benchmark_threshold else "red" for time in times
|
||||||
|
],
|
||||||
|
text=times,
|
||||||
|
textposition="auto",
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
# Add a horizontal line for the benchmark
|
||||||
|
fig.add_shape(
|
||||||
|
type="line",
|
||||||
|
x0=-0.5, # start slightly before the first bar
|
||||||
|
x1=len(versions) - 0.5, # end slightly after the last bar
|
||||||
|
y0=av_s_per_100_steps_10_nodes_benchmark_threshold,
|
||||||
|
y1=av_s_per_100_steps_10_nodes_benchmark_threshold,
|
||||||
|
line=dict(
|
||||||
|
color=benchmark_line_color,
|
||||||
|
width=2,
|
||||||
|
dash="dot",
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
fig.update_layout(
|
||||||
|
xaxis_title="PrimAITE Version",
|
||||||
|
yaxis_title="Avg Time per 100 Steps on 10 Nodes (seconds)",
|
||||||
|
yaxis=dict(range=[0, max_y_axis_value]),
|
||||||
|
title=title,
|
||||||
|
)
|
||||||
|
|
||||||
|
return fig
|
||||||
|
|
||||||
|
|
||||||
|
def build_benchmark_md_report(
|
||||||
benchmark_start_time: datetime, session_metadata: Dict, config_path: Path, results_root_path: Path
|
benchmark_start_time: datetime, session_metadata: Dict, config_path: Path, results_root_path: Path
|
||||||
) -> None:
|
) -> None:
|
||||||
"""Generates a latex report of the benchmark run."""
|
"""
|
||||||
|
Generates a Markdown report for a benchmarking session, documenting performance metrics and graphs.
|
||||||
|
|
||||||
|
This function orchestrates the creation of several graphs depicting various performance benchmarks and aggregates
|
||||||
|
them into a markdown document that includes comprehensive system and benchmark information.
|
||||||
|
|
||||||
|
:param benchmark_start_time: The datetime object representing when the benchmarking process was initiated.
|
||||||
|
:param session_metadata: A dictionary containing metadata for each benchmarking session.
|
||||||
|
:param config_path: A pathlib.Path object pointing to the configuration file used for the benchmark sessions.
|
||||||
|
:param results_root_path: A pathlib.Path object pointing to the directory where the results and graphs should be
|
||||||
|
saved.
|
||||||
|
"""
|
||||||
# generate report folder
|
# generate report folder
|
||||||
v_str = f"v{primaite.__version__}"
|
v_str = f"v{primaite.__version__}"
|
||||||
|
|
||||||
@@ -208,98 +336,91 @@ def build_benchmark_latex_report(
|
|||||||
|
|
||||||
fig = _plot_all_benchmarks_combined_session_av(results_directory=results_root_path)
|
fig = _plot_all_benchmarks_combined_session_av(results_directory=results_root_path)
|
||||||
|
|
||||||
all_version_plot_path = results_root_path / "PrimAITE Versions Learning Benchmark.png"
|
filename = f"PrimAITE Learning Benchmark of Minor and Bugfix Releases for Major Version {major_v}.png"
|
||||||
|
|
||||||
|
all_version_plot_path = version_result_dir / filename
|
||||||
fig.write_image(all_version_plot_path)
|
fig.write_image(all_version_plot_path)
|
||||||
|
|
||||||
geometry_options = {"tmargin": "2.5cm", "rmargin": "2.5cm", "bmargin": "2.5cm", "lmargin": "2.5cm"}
|
performance_benchmark_dict = _get_performance_benchmark_for_all_version_dict(results_directory=results_root_path)
|
||||||
|
fig = _plot_av_s_per_100_steps_10_nodes(performance_benchmark_dict)
|
||||||
|
filename = f"PrimAITE Performance of Minor and Bugfix Releases for Major Version {major_v}.png"
|
||||||
|
performance_benchmark_plot_path = version_result_dir / filename
|
||||||
|
fig.write_image(performance_benchmark_plot_path)
|
||||||
|
|
||||||
data = benchmark_metadata_dict
|
data = benchmark_metadata_dict
|
||||||
primaite_version = data["primaite_version"]
|
primaite_version = data["primaite_version"]
|
||||||
|
|
||||||
# Create a new document
|
with open(version_result_dir / f"PrimAITE v{primaite_version} Benchmark Report.md", "w") as file:
|
||||||
doc = Document("report", geometry_options=geometry_options)
|
# Title
|
||||||
# Title
|
file.write(f"# PrimAITE v{primaite_version} Learning Benchmark\n")
|
||||||
doc.preamble.append(Command("title", f"PrimAITE {primaite_version} Learning Benchmark"))
|
file.write("## PrimAITE Dev Team\n")
|
||||||
doc.preamble.append(Command("author", "PrimAITE Dev Team"))
|
file.write(f"### {datetime.now().date()}\n")
|
||||||
doc.preamble.append(Command("date", datetime.now().date()))
|
file.write("\n---\n")
|
||||||
doc.append(Command("maketitle"))
|
|
||||||
|
|
||||||
sessions = data["total_sessions"]
|
sessions = data["total_sessions"]
|
||||||
episodes = session_metadata[1]["total_episodes"] - 1
|
episodes = session_metadata[1]["total_episodes"] - 1
|
||||||
steps = data["config"]["game"]["max_episode_length"]
|
steps = data["config"]["game"]["max_episode_length"]
|
||||||
|
|
||||||
# Body
|
# Body
|
||||||
with doc.create(Section("Introduction")):
|
file.write("## 1 Introduction\n")
|
||||||
doc.append(
|
file.write(
|
||||||
f"PrimAITE v{primaite_version} was benchmarked automatically upon release. Learning rate metrics "
|
f"PrimAITE v{primaite_version} was benchmarked automatically upon release. Learning rate metrics "
|
||||||
f"were captured to be referenced during system-level testing and user acceptance testing (UAT)."
|
f"were captured to be referenced during system-level testing and user acceptance testing (UAT).\n"
|
||||||
)
|
)
|
||||||
doc.append(
|
file.write(
|
||||||
f"\nThe benchmarking process consists of running {sessions} training session using the same "
|
f"The benchmarking process consists of running {sessions} training session using the same "
|
||||||
f"config file. Each session trains an agent for {episodes} episodes, "
|
f"config file. Each session trains an agent for {episodes} episodes, "
|
||||||
f"with each episode consisting of {steps} steps."
|
f"with each episode consisting of {steps} steps.\n"
|
||||||
)
|
)
|
||||||
doc.append(
|
file.write(
|
||||||
f"\nThe total reward per episode from each session is captured. This is then used to calculate an "
|
f"The total reward per episode from each session is captured. This is then used to calculate an "
|
||||||
f"caverage total reward per episode from the {sessions} individual sessions for smoothing. "
|
f"caverage total reward per episode from the {sessions} individual sessions for smoothing. "
|
||||||
f"Finally, a 25-widow rolling average of the average total reward per session is calculated for "
|
f"Finally, a 25-widow rolling average of the average total reward per session is calculated for "
|
||||||
f"further smoothing."
|
f"further smoothing.\n"
|
||||||
)
|
)
|
||||||
|
|
||||||
with doc.create(Section("System Information")):
|
file.write("## 2 System Information\n")
|
||||||
with doc.create(Subsection("Python")):
|
i = 1
|
||||||
with doc.create(Tabular("|l|l|")) as table:
|
file.write(f"### 2.{i} Python\n")
|
||||||
table.add_hline()
|
file.write(f"**Version:** {sys.version}\n")
|
||||||
table.add_row((bold("Version"), sys.version))
|
|
||||||
table.add_hline()
|
|
||||||
for section, section_data in data["system_info"].items():
|
for section, section_data in data["system_info"].items():
|
||||||
|
i += 1
|
||||||
if section_data:
|
if section_data:
|
||||||
with doc.create(Subsection(section)):
|
file.write(f"### 2.{i} {section}\n")
|
||||||
if isinstance(section_data, dict):
|
if isinstance(section_data, dict):
|
||||||
with doc.create(Tabular("|l|l|")) as table:
|
for key, value in section_data.items():
|
||||||
table.add_hline()
|
file.write(f"- **{key}:** {value}\n")
|
||||||
for key, value in section_data.items():
|
|
||||||
table.add_row((bold(key), value))
|
|
||||||
table.add_hline()
|
|
||||||
elif isinstance(section_data, list):
|
|
||||||
headers = section_data[0].keys()
|
|
||||||
tabs_str = "|".join(["l" for _ in range(len(headers))])
|
|
||||||
tabs_str = f"|{tabs_str}|"
|
|
||||||
with doc.create(Tabular(tabs_str)) as table:
|
|
||||||
table.add_hline()
|
|
||||||
table.add_row([bold(h) for h in headers])
|
|
||||||
table.add_hline()
|
|
||||||
for item in section_data:
|
|
||||||
table.add_row(item.values())
|
|
||||||
table.add_hline()
|
|
||||||
|
|
||||||
headers_map = {
|
headers_map = {
|
||||||
"total_sessions": "Total Sessions",
|
"total_sessions": "Total Sessions",
|
||||||
"total_episodes": "Total Episodes",
|
"total_episodes": "Total Episodes",
|
||||||
"total_time_steps": "Total Steps",
|
"total_time_steps": "Total Steps",
|
||||||
"av_s_per_session": "Av Session Duration (s)",
|
"av_s_per_session": "Av Session Duration (s)",
|
||||||
"av_s_per_step": "Av Step Duration (s)",
|
"av_s_per_step": "Av Step Duration (s)",
|
||||||
"av_s_per_100_steps_10_nodes": "Av Duration per 100 Steps per 10 Nodes (s)",
|
"av_s_per_100_steps_10_nodes": "Av Duration per 100 Steps per 10 Nodes (s)",
|
||||||
}
|
}
|
||||||
with doc.create(Section("Stats")):
|
|
||||||
with doc.create(Subsection("Benchmark Results")):
|
|
||||||
with doc.create(Tabular("|l|l|")) as table:
|
|
||||||
table.add_hline()
|
|
||||||
for section, header in headers_map.items():
|
|
||||||
if section.startswith("av_"):
|
|
||||||
table.add_row((bold(header), f"{data[section]:.4f}"))
|
|
||||||
else:
|
|
||||||
table.add_row((bold(header), data[section]))
|
|
||||||
table.add_hline()
|
|
||||||
|
|
||||||
with doc.create(Section("Graphs")):
|
file.write("## 3 Stats\n")
|
||||||
with doc.create(Subsection(f"v{primaite_version} Learning Benchmark Plot")):
|
for section, header in headers_map.items():
|
||||||
with doc.create(LatexFigure(position="h!")) as pic:
|
if section.startswith("av_"):
|
||||||
pic.add_image(str(this_version_plot_path))
|
file.write(f"- **{header}:** {data[section]:.4f}\n")
|
||||||
pic.add_caption(f"PrimAITE {primaite_version} Learning Benchmark Plot")
|
else:
|
||||||
|
file.write(f"- **{header}:** {data[section]}\n")
|
||||||
|
|
||||||
with doc.create(Subsection(f"Learning Benchmarking of All Released Versions under Major v{major_v}.*.*")):
|
file.write("## 4 Graphs\n")
|
||||||
with doc.create(LatexFigure(position="h!")) as pic:
|
|
||||||
pic.add_image(str(all_version_plot_path))
|
|
||||||
pic.add_caption(f"Learning Benchmarking of All Released Versions under Major v{major_v}.*.*")
|
|
||||||
|
|
||||||
doc.generate_pdf(str(this_version_plot_path).replace(".png", ""), clean_tex=True)
|
file.write(f"### 4.1 v{primaite_version} Learning Benchmark Plot\n")
|
||||||
|
file.write(f"\n")
|
||||||
|
|
||||||
|
file.write(f"### 4.2 Learning Benchmark of Minor and Bugfix Releases for Major Version {major_v}\n")
|
||||||
|
file.write(
|
||||||
|
f"![Learning Benchmark of Minor and Bugfix Releases for Major Version {major_v}]"
|
||||||
|
f"({all_version_plot_path.name})\n"
|
||||||
|
)
|
||||||
|
|
||||||
|
file.write(f"### 4.3 Performance of Minor and Bugfix Releases for Major Version {major_v}\n")
|
||||||
|
file.write(
|
||||||
|
f"![Performance of Minor and Bugfix Releases for Major Version {major_v}]"
|
||||||
|
f"({performance_benchmark_plot_path.name})\n"
|
||||||
|
)
|
||||||
|
|||||||
Binary file not shown.
|
Before Width: | Height: | Size: 91 KiB |
Binary file not shown.
Binary file not shown.
|
Before Width: | Height: | Size: 295 KiB |
@@ -1,4 +1,4 @@
|
|||||||
# © Crown-owned copyright 2023, Defence Science and Technology Laboratory UK
|
# © Crown-owned copyright 2024, Defence Science and Technology Laboratory UK
|
||||||
import platform
|
import platform
|
||||||
from typing import Dict
|
from typing import Dict
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user