Files
PrimAITE/benchmark/report.py
2024-08-07 10:07:19 +01:00

421 lines
16 KiB
Python

# © Crown-owned copyright 2024, Defence Science and Technology Laboratory UK
import json
import sys
from datetime import datetime
from os import PathLike
from pathlib import Path
from typing import Dict, Optional
import plotly.graph_objects as go
import polars as pl
import yaml
from plotly.graph_objs import Figure
from utils import _get_system_info
import primaite
PLOT_CONFIG = {
"size": {"auto_size": False, "width": 800, "height": 800},
"template": "plotly_white",
"range_slider": False,
}
def _build_benchmark_results_dict(start_datetime: datetime, metadata_dict: Dict, config: Dict) -> dict:
"""
Constructs a dictionary aggregating benchmark results from multiple sessions.
:param start_datetime: The datetime when the benchmarking started.
:param metadata_dict: Dictionary containing metadata for each session.
:param config: Configuration settings used during the benchmarking.
:return: A dictionary containing aggregated data and metadata from the benchmarking sessions.
"""
num_sessions = len(metadata_dict) # number of sessions
averaged_data = {
"start_timestamp": start_datetime.isoformat(),
"end_datetime": datetime.now().isoformat(),
"primaite_version": primaite.__version__,
"system_info": _get_system_info(),
"total_sessions": num_sessions,
"total_episodes": sum(d["total_episodes"] for d in metadata_dict.values()),
"total_time_steps": sum(d["total_time_steps"] for d in metadata_dict.values()),
"av_s_per_session": sum(d["total_s"] for d in metadata_dict.values()) / num_sessions,
"av_s_per_step": sum(d["s_per_step"] for d in metadata_dict.values()) / num_sessions,
"av_s_per_100_steps_10_nodes": sum(d["s_per_100_steps_10_nodes"] for d in metadata_dict.values())
/ num_sessions,
"combined_total_reward_per_episode": {},
"session_total_reward_per_episode": {k: v["total_reward_per_episode"] for k, v in metadata_dict.items()},
"config": config,
}
# find the average of each episode across all sessions
episodes = metadata_dict[1]["total_reward_per_episode"].keys()
for episode in episodes:
combined_av_reward = (
sum(metadata_dict[k]["total_reward_per_episode"][episode] for k in metadata_dict.keys()) / num_sessions
)
averaged_data["combined_total_reward_per_episode"][episode] = combined_av_reward
return averaged_data
def _get_df_from_episode_av_reward_dict(data: Dict) -> pl.DataFrame:
"""
Converts a dictionary of episode average rewards into a Polars DataFrame.
:param data: Dictionary with episodes as keys and average rewards as values.
:return: Polars DataFrame with episodes and average rewards, including a rolling average.
"""
data: Dict = {"episode": data.keys(), "av_reward": data.values()}
return (
pl.from_dict(data)
.with_columns(rolling_mean=pl.col("av_reward").rolling_mean(window_size=25))
.rename({"rolling_mean": "rolling_av_reward"})
)
def _plot_benchmark_metadata(
benchmark_metadata_dict: Dict,
title: Optional[str] = None,
subtitle: Optional[str] = None,
) -> Figure:
"""
Plots benchmark metadata as a line graph using Plotly.
:param benchmark_metadata_dict: Dictionary containing the total reward per episode and session.
:param title: Optional title for the graph.
:param subtitle: Optional subtitle for the graph.
:return: Plotly figure object representing the benchmark metadata plot.
"""
if title:
if subtitle:
title = f"{title} <br>{subtitle}</sup>"
else:
if subtitle:
title = subtitle
layout = go.Layout(
autosize=PLOT_CONFIG["size"]["auto_size"],
width=PLOT_CONFIG["size"]["width"],
height=PLOT_CONFIG["size"]["height"],
)
# Create the line graph with a colored line
fig = go.Figure(layout=layout)
fig.update_layout(template=PLOT_CONFIG["template"])
for session, av_reward_dict in benchmark_metadata_dict["session_total_reward_per_episode"].items():
df = _get_df_from_episode_av_reward_dict(av_reward_dict)
fig.add_trace(
go.Scatter(
x=df["episode"],
y=df["av_reward"],
mode="lines",
name=f"Session {session}",
opacity=0.25,
line={"color": "#a6a6a6"},
)
)
df = _get_df_from_episode_av_reward_dict(benchmark_metadata_dict["combined_total_reward_per_episode"])
fig.add_trace(
go.Scatter(
x=df["episode"], y=df["av_reward"], mode="lines", name="Combined Session Av", line={"color": "#FF0000"}
)
)
fig.add_trace(
go.Scatter(
x=df["episode"],
y=df["rolling_av_reward"],
mode="lines",
name="Rolling Av (Combined Session Av)",
line={"color": "#4CBB17"},
)
)
# Set the layout of the graph
fig.update_layout(
xaxis={
"title": "Episode",
"type": "linear",
},
yaxis={"title": "Total Reward"},
title=title,
)
fig.update_layout(
legend=dict(
yanchor="top",
y=0.99,
xanchor="left",
x=0.01,
bgcolor="rgba(255,255,255,0.3)",
)
)
for trace in fig["data"]:
if trace["name"].startswith("Session"):
trace["showlegend"] = False
fig["data"][0]["name"] = "Individual Sessions"
fig["data"][0]["showlegend"] = True
return fig
def _plot_all_benchmarks_combined_session_av(results_directory: Path) -> Figure:
"""
Plot the Benchmark results for each released version of PrimAITE.
Does this by iterating over the ``benchmark/results`` directory and
extracting the benchmark metadata json for each version that has been
benchmarked. The combined_total_reward_per_episode is extracted from each,
converted into a polars dataframe, and plotted as a scatter line in plotly.
"""
major_v = primaite.__version__.split(".")[0]
title = f"Learning Benchmark of Minor and Bugfix Releases for Major Version {major_v}"
subtitle = "Rolling Av (Combined Session Av)"
if title:
if subtitle:
title = f"{title} <br>{subtitle}</sup>"
else:
if subtitle:
title = subtitle
layout = go.Layout(
autosize=PLOT_CONFIG["size"]["auto_size"],
width=PLOT_CONFIG["size"]["width"],
height=PLOT_CONFIG["size"]["height"],
)
# Create the line graph with a colored line
fig = go.Figure(layout=layout)
fig.update_layout(template=PLOT_CONFIG["template"])
for dir in results_directory.iterdir():
if dir.is_dir():
metadata_file = dir / f"{dir.name}_benchmark_metadata.json"
with open(metadata_file, "r") as file:
metadata_dict = json.load(file)
df = _get_df_from_episode_av_reward_dict(metadata_dict["combined_total_reward_per_episode"])
fig.add_trace(go.Scatter(x=df["episode"], y=df["rolling_av_reward"], mode="lines", name=dir.name))
# Set the layout of the graph
fig.update_layout(
xaxis={
"title": "Episode",
"type": "linear",
},
yaxis={"title": "Total Reward"},
title=title,
)
fig["data"][0]["showlegend"] = True
fig.update_layout(legend=dict(yanchor="top", y=-0.2, xanchor="left", x=0.01, orientation="h"))
return fig
def _get_performance_benchmark_for_all_version_dict(results_directory: Path) -> Dict[str, float]:
"""
Gathers performance benchmarks for all versions of the software stored in a specified directory.
This function iterates through each directory within the specified results directory,
extracts the av_s_per_100_steps_10_nodes from the benchmark_metadata.json files, and aggregates it into a
dictionary.
:param results_directory: The directory containing subdirectories for each version's benchmark data.
:return: A dictionary with version numbers as keys and their corresponding average performance benchmark
(average time per 100 steps on 10 nodes) as values.
"""
performance_benchmark_dict = {}
for dir in results_directory.iterdir():
if dir.is_dir():
metadata_file = dir / f"{dir.name}_benchmark_metadata.json"
with open(metadata_file, "r") as file:
metadata_dict = json.load(file)
version = metadata_dict["primaite_version"]
performance_benchmark_dict[version] = metadata_dict["av_s_per_100_steps_10_nodes"]
return performance_benchmark_dict
def _plot_av_s_per_100_steps_10_nodes(
version_times_dict: Dict[str, float],
) -> Figure:
"""
Creates a bar chart visualising the performance of each version of PrimAITE.
Performance is based on the average training time per 100 steps on 10 nodes.
:param version_times_dict: A dictionary with software versions as keys and average times as values.
:return: A Plotly figure object representing the bar chart of the performance metrics.
"""
major_v = primaite.__version__.split(".")[0]
title = f"Performance of Minor and Bugfix Releases for Major Version {major_v}"
subtitle = "Average Training Time per 100 Steps on 10 Nodes "
title = f"{title} <br><sub>{subtitle}</sub>"
layout = go.Layout(
autosize=PLOT_CONFIG["size"]["auto_size"],
width=PLOT_CONFIG["size"]["width"],
height=PLOT_CONFIG["size"]["height"],
)
fig = go.Figure(layout=layout)
fig.update_layout(template=PLOT_CONFIG["template"])
versions = sorted(list(version_times_dict.keys()))
times = [version_times_dict[version] for version in versions]
fig.add_trace(go.Bar(x=versions, y=times, text=times, textposition="auto", texttemplate="%{y:.3f}"))
fig.update_layout(
xaxis_title="PrimAITE Version",
yaxis_title="Avg Time per 100 Steps on 10 Nodes (seconds)",
title=title,
)
return fig
def build_benchmark_md_report(
benchmark_start_time: datetime,
session_metadata: Dict,
config_path: Path,
results_root_path: Path,
output_path: PathLike,
) -> None:
"""
Generates a Markdown report for a benchmarking session, documenting performance metrics and graphs.
This function orchestrates the creation of several graphs depicting various performance benchmarks and aggregates
them into a markdown document that includes comprehensive system and benchmark information.
:param benchmark_start_time: The datetime object representing when the benchmarking process was initiated.
:param session_metadata: A dictionary containing metadata for each benchmarking session.
:param config_path: A pathlib.Path object pointing to the configuration file used for the benchmark sessions.
:param results_root_path: A pathlib.Path object pointing to the directory where the results and graphs should be
saved.
"""
# generate report folder
v_str = f"v{primaite.__version__}"
version_result_dir = results_root_path / v_str
version_result_dir.mkdir(exist_ok=True, parents=True)
# load the config file as dict
with open(config_path, "r") as f:
cfg_data = yaml.safe_load(f)
# generate the benchmark metadata dict
benchmark_metadata_dict = _build_benchmark_results_dict(
start_datetime=benchmark_start_time, metadata_dict=session_metadata, config=cfg_data
)
major_v = primaite.__version__.split(".")[0]
with open(version_result_dir / f"{v_str}_benchmark_metadata.json", "w") as file:
json.dump(benchmark_metadata_dict, file, indent=4)
title = f"PrimAITE v{primaite.__version__.strip()} Learning Benchmark"
fig = _plot_benchmark_metadata(benchmark_metadata_dict, title=title)
this_version_plot_path = version_result_dir / f"{title}.png"
fig.write_image(this_version_plot_path)
fig = _plot_all_benchmarks_combined_session_av(results_directory=results_root_path)
filename = f"PrimAITE Learning Benchmark of Minor and Bugfix Releases for Major Version {major_v}.png"
all_version_plot_path = version_result_dir / filename
fig.write_image(all_version_plot_path)
performance_benchmark_dict = _get_performance_benchmark_for_all_version_dict(results_directory=results_root_path)
fig = _plot_av_s_per_100_steps_10_nodes(performance_benchmark_dict)
filename = f"PrimAITE Performance of Minor and Bugfix Releases for Major Version {major_v}.png"
performance_benchmark_plot_path = version_result_dir / filename
fig.write_image(performance_benchmark_plot_path)
data = benchmark_metadata_dict
primaite_version = data["primaite_version"]
with open(output_path, "w") as file:
# Title
file.write(f"# PrimAITE v{primaite_version} Learning Benchmark\n")
file.write("## PrimAITE Dev Team\n")
file.write(f"### {datetime.now().date()}\n")
file.write("\n---\n")
sessions = data["total_sessions"]
episodes = session_metadata[1]["total_episodes"] - 1
steps = data["config"]["game"]["max_episode_length"]
# Body
file.write("## 1 Introduction\n")
file.write(
f"PrimAITE v{primaite_version} was benchmarked automatically upon release. Learning rate metrics "
f"were captured to be referenced during system-level testing and user acceptance testing (UAT).\n"
)
file.write(
f"The benchmarking process consists of running {sessions} training session using the same "
f"config file. Each session trains an agent for {episodes} episodes, "
f"with each episode consisting of {steps} steps.\n"
)
file.write(
f"The total reward per episode from each session is captured. This is then used to calculate an "
f"caverage total reward per episode from the {sessions} individual sessions for smoothing. "
f"Finally, a 25-widow rolling average of the average total reward per session is calculated for "
f"further smoothing.\n"
)
file.write("## 2 System Information\n")
i = 1
file.write(f"### 2.{i} Python\n")
file.write(f"**Version:** {sys.version}\n")
for section, section_data in data["system_info"].items():
i += 1
if section_data:
file.write(f"### 2.{i} {section}\n")
if isinstance(section_data, dict):
for key, value in section_data.items():
file.write(f"- **{key}:** {value}\n")
headers_map = {
"total_sessions": "Total Sessions",
"total_episodes": "Total Episodes",
"total_time_steps": "Total Steps",
"av_s_per_session": "Av Session Duration (s)",
"av_s_per_step": "Av Step Duration (s)",
"av_s_per_100_steps_10_nodes": "Av Duration per 100 Steps per 10 Nodes (s)",
}
file.write("## 3 Stats\n")
for section, header in headers_map.items():
if section.startswith("av_"):
file.write(f"- **{header}:** {data[section]:.4f}\n")
else:
file.write(f"- **{header}:** {data[section]}\n")
file.write("## 4 Graphs\n")
file.write(f"### 4.1 v{primaite_version} Learning Benchmark Plot\n")
file.write(f"![PrimAITE {primaite_version} Learning Benchmark Plot]({this_version_plot_path.name})\n")
file.write(f"### 4.2 Learning Benchmark of Minor and Bugfix Releases for Major Version {major_v}\n")
file.write(
f"![Learning Benchmark of Minor and Bugfix Releases for Major Version {major_v}]"
f"({all_version_plot_path.name})\n"
)
file.write(f"### 4.3 Performance of Minor and Bugfix Releases for Major Version {major_v}\n")
file.write(
f"![Performance of Minor and Bugfix Releases for Major Version {major_v}]"
f"({performance_benchmark_plot_path.name})\n"
)
def md2pdf(md_path: PathLike, pdf_path: PathLike, css_path: PathLike) -> None:
"""Generate PDF version of Markdown report."""
from md2pdf.core import md2pdf
md2pdf(
pdf_file_path=pdf_path,
md_file_path=md_path,
base_url=Path(md_path).parent,
css_file_path=css_path,
)