#2628: temp commit
This commit is contained in:
@@ -1,37 +1,19 @@
|
||||
# flake8: noqa
|
||||
raise DeprecationWarning(
|
||||
"Benchmarking depends on deprecated functionality and it has not been updated to primaite v3 yet."
|
||||
)
|
||||
# © Crown-owned copyright 2023, Defence Science and Technology Laboratory UK
|
||||
import json
|
||||
import platform
|
||||
import shutil
|
||||
import sys
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, Final, Optional, Tuple, Union
|
||||
from unittest.mock import patch
|
||||
from typing import Any, Dict, Final, Tuple
|
||||
|
||||
import GPUtil
|
||||
import plotly.graph_objects as go
|
||||
import polars as pl
|
||||
import psutil
|
||||
import yaml
|
||||
from plotly.graph_objs import Figure
|
||||
from pylatex import Command, Document
|
||||
from pylatex import Figure as LatexFigure
|
||||
from pylatex import Section, Subsection, Tabular
|
||||
from pylatex.utils import bold
|
||||
from stable_baselines3 import PPO
|
||||
|
||||
import primaite
|
||||
from primaite.config.lay_down_config import data_manipulation_config_path
|
||||
from primaite.data_viz.session_plots import get_plotly_config
|
||||
from primaite.environment.primaite_env import Primaite
|
||||
from primaite.primaite_session import PrimaiteSession
|
||||
from benchmark.utils.benchmark import BenchmarkPrimaiteGymEnv
|
||||
from benchmark.utils.report import build_benchmark_latex_report
|
||||
from primaite.config.load import data_manipulation_config_path
|
||||
|
||||
_LOGGER = primaite.getLogger(__name__)
|
||||
|
||||
_BENCHMARK_ROOT = Path(__file__).parent
|
||||
_BENCHMARK_ROOT = Path(__file__).parent / "benchmark_session"
|
||||
_RESULTS_ROOT: Final[Path] = _BENCHMARK_ROOT / "results"
|
||||
_RESULTS_ROOT.mkdir(exist_ok=True, parents=True)
|
||||
|
||||
@@ -41,171 +23,51 @@ if _OUTPUT_ROOT.exists():
|
||||
shutil.rmtree(_OUTPUT_ROOT)
|
||||
_OUTPUT_ROOT.mkdir()
|
||||
|
||||
_TRAINING_CONFIG_PATH = _BENCHMARK_ROOT / "config" / "benchmark_training_config.yaml"
|
||||
_LAY_DOWN_CONFIG_PATH = data_manipulation_config_path()
|
||||
|
||||
class BenchmarkSession:
|
||||
"""Benchmark Session class."""
|
||||
|
||||
def get_size(size_bytes: int) -> str:
|
||||
"""
|
||||
Scale bytes to its proper format.
|
||||
gym_env: BenchmarkPrimaiteGymEnv
|
||||
"""Gym environment used by the session to train."""
|
||||
|
||||
e.g:
|
||||
1253656 => '1.20MB'
|
||||
1253656678 => '1.17GB'
|
||||
num_episodes: int
|
||||
"""Number of episodes to run the training session."""
|
||||
|
||||
:
|
||||
"""
|
||||
factor = 1024
|
||||
for unit in ["", "K", "M", "G", "T", "P"]:
|
||||
if size_bytes < factor:
|
||||
return f"{size_bytes:.2f}{unit}B"
|
||||
size_bytes /= factor
|
||||
batch_size: int
|
||||
"""Number of steps for each episode."""
|
||||
|
||||
start_time: datetime
|
||||
"""Start time for the session."""
|
||||
|
||||
def _get_system_info() -> Dict:
|
||||
"""Builds and returns a dict containing system info."""
|
||||
uname = platform.uname()
|
||||
cpu_freq = psutil.cpu_freq()
|
||||
virtual_mem = psutil.virtual_memory()
|
||||
swap_mem = psutil.swap_memory()
|
||||
gpus = GPUtil.getGPUs()
|
||||
return {
|
||||
"System": {
|
||||
"OS": uname.system,
|
||||
"OS Version": uname.version,
|
||||
"Machine": uname.machine,
|
||||
"Processor": uname.processor,
|
||||
},
|
||||
"CPU": {
|
||||
"Physical Cores": psutil.cpu_count(logical=False),
|
||||
"Total Cores": psutil.cpu_count(logical=True),
|
||||
"Max Frequency": f"{cpu_freq.max:.2f}Mhz",
|
||||
},
|
||||
"Memory": {"Total": get_size(virtual_mem.total), "Swap Total": get_size(swap_mem.total)},
|
||||
"GPU": [{"Name": gpu.name, "Total Memory": f"{gpu.memoryTotal}MB"} for gpu in gpus],
|
||||
}
|
||||
end_time: datetime
|
||||
"""End time for the session."""
|
||||
|
||||
session_metadata: Dict
|
||||
"""Dict containing the metadata for the session - used to generate benchmark report."""
|
||||
|
||||
def _build_benchmark_latex_report(
|
||||
benchmark_metadata_dict: Dict, this_version_plot_path: Path, all_version_plot_path: Path
|
||||
) -> None:
|
||||
geometry_options = {"tmargin": "2.5cm", "rmargin": "2.5cm", "bmargin": "2.5cm", "lmargin": "2.5cm"}
|
||||
data = benchmark_metadata_dict
|
||||
primaite_version = data["primaite_version"]
|
||||
def __init__(self, gym_env: BenchmarkPrimaiteGymEnv, num_episodes: int, batch_size: int):
|
||||
"""Initialise the BenchmarkSession."""
|
||||
self.gym_env = gym_env
|
||||
self.num_episodes = num_episodes
|
||||
self.batch_size = batch_size
|
||||
|
||||
# Create a new document
|
||||
doc = Document("report", geometry_options=geometry_options)
|
||||
# Title
|
||||
doc.preamble.append(Command("title", f"PrimAITE {primaite_version} Learning Benchmark"))
|
||||
doc.preamble.append(Command("author", "PrimAITE Dev Team"))
|
||||
doc.preamble.append(Command("date", datetime.now().date()))
|
||||
doc.append(Command("maketitle"))
|
||||
def train(self):
|
||||
"""Run the training session."""
|
||||
# start timer for session
|
||||
self.start_time = datetime.now()
|
||||
|
||||
sessions = data["total_sessions"]
|
||||
episodes = data["training_config"]["num_train_episodes"]
|
||||
steps = data["training_config"]["num_train_steps"]
|
||||
|
||||
# Body
|
||||
with doc.create(Section("Introduction")):
|
||||
doc.append(
|
||||
f"PrimAITE v{primaite_version} was benchmarked automatically upon release. Learning rate metrics "
|
||||
f"were captured to be referenced during system-level testing and user acceptance testing (UAT)."
|
||||
)
|
||||
doc.append(
|
||||
f"\nThe benchmarking process consists of running {sessions} training session using the same "
|
||||
f"training and lay down config files. Each session trains an agent for {episodes} episodes, "
|
||||
f"with each episode consisting of {steps} steps."
|
||||
)
|
||||
doc.append(
|
||||
f"\nThe mean reward per episode from each session is captured. This is then used to calculate a "
|
||||
f"combined average reward per episode from the {sessions} individual sessions for smoothing. "
|
||||
f"Finally, a 25-widow rolling average of the combined average reward per session is calculated for "
|
||||
f"further smoothing."
|
||||
model = PPO(
|
||||
policy="MlpPolicy",
|
||||
env=self.gym_env,
|
||||
batch_size=self.batch_size,
|
||||
n_steps=self.batch_size * self.num_episodes,
|
||||
)
|
||||
model.learn(total_timesteps=self.batch_size * self.num_episodes)
|
||||
|
||||
with doc.create(Section("System Information")):
|
||||
with doc.create(Subsection("Python")):
|
||||
with doc.create(Tabular("|l|l|")) as table:
|
||||
table.add_hline()
|
||||
table.add_row((bold("Version"), sys.version))
|
||||
table.add_hline()
|
||||
for section, section_data in data["system_info"].items():
|
||||
if section_data:
|
||||
with doc.create(Subsection(section)):
|
||||
if isinstance(section_data, dict):
|
||||
with doc.create(Tabular("|l|l|")) as table:
|
||||
table.add_hline()
|
||||
for key, value in section_data.items():
|
||||
table.add_row((bold(key), value))
|
||||
table.add_hline()
|
||||
elif isinstance(section_data, list):
|
||||
headers = section_data[0].keys()
|
||||
tabs_str = "|".join(["l" for _ in range(len(headers))])
|
||||
tabs_str = f"|{tabs_str}|"
|
||||
with doc.create(Tabular(tabs_str)) as table:
|
||||
table.add_hline()
|
||||
table.add_row([bold(h) for h in headers])
|
||||
table.add_hline()
|
||||
for item in section_data:
|
||||
table.add_row(item.values())
|
||||
table.add_hline()
|
||||
# end timer for session
|
||||
self.end_time = datetime.now()
|
||||
|
||||
headers_map = {
|
||||
"total_sessions": "Total Sessions",
|
||||
"total_episodes": "Total Episodes",
|
||||
"total_time_steps": "Total Steps",
|
||||
"av_s_per_session": "Av Session Duration (s)",
|
||||
"av_s_per_step": "Av Step Duration (s)",
|
||||
"av_s_per_100_steps_10_nodes": "Av Duration per 100 Steps per 10 Nodes (s)",
|
||||
}
|
||||
with doc.create(Section("Stats")):
|
||||
with doc.create(Subsection("Benchmark Results")):
|
||||
with doc.create(Tabular("|l|l|")) as table:
|
||||
table.add_hline()
|
||||
for section, header in headers_map.items():
|
||||
if section.startswith("av_"):
|
||||
table.add_row((bold(header), f"{data[section]:.4f}"))
|
||||
else:
|
||||
table.add_row((bold(header), data[section]))
|
||||
table.add_hline()
|
||||
|
||||
with doc.create(Section("Graphs")):
|
||||
with doc.create(Subsection(f"PrimAITE {primaite_version} Learning Benchmark Plot")):
|
||||
with doc.create(LatexFigure(position="h!")) as pic:
|
||||
pic.add_image(str(this_version_plot_path))
|
||||
pic.add_caption(f"PrimAITE {primaite_version} Learning Benchmark Plot")
|
||||
|
||||
with doc.create(Subsection("PrimAITE All Versions Learning Benchmark Plot")):
|
||||
with doc.create(LatexFigure(position="h!")) as pic:
|
||||
pic.add_image(str(all_version_plot_path))
|
||||
pic.add_caption("PrimAITE All Versions Learning Benchmark Plot")
|
||||
|
||||
doc.generate_pdf(str(this_version_plot_path).replace(".png", ""), clean_tex=True)
|
||||
|
||||
|
||||
class BenchmarkPrimaiteSession(PrimaiteSession):
|
||||
"""A benchmarking primaite session."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
training_config_path: Union[str, Path],
|
||||
lay_down_config_path: Union[str, Path],
|
||||
) -> None:
|
||||
super().__init__(training_config_path, lay_down_config_path)
|
||||
self.setup()
|
||||
|
||||
@property
|
||||
def env(self) -> Primaite:
|
||||
"""Direct access to the env for ease of testing."""
|
||||
return self._agent_session._env # noqa
|
||||
|
||||
def __enter__(self) -> "BenchmarkPrimaiteSession":
|
||||
return self
|
||||
|
||||
# TODO: typehints uncertain
|
||||
def __exit__(self, type: Any, value: Any, tb: Any) -> None:
|
||||
shutil.rmtree(self.session_path)
|
||||
_LOGGER.debug(f"Deleted benchmark session directory: {self.session_path}")
|
||||
self.session_metadata = self.generate_learn_metadata_dict()
|
||||
|
||||
def _learn_benchmark_durations(self) -> Tuple[float, float, float]:
|
||||
"""
|
||||
@@ -219,235 +81,78 @@ class BenchmarkPrimaiteSession(PrimaiteSession):
|
||||
:return: The learning benchmark durations as a Tuple of three floats:
|
||||
Tuple[total_s, s_per_step, s_per_100_steps_10_nodes].
|
||||
"""
|
||||
data = self.metadata_file_as_dict()
|
||||
start_dt = datetime.fromisoformat(data["start_datetime"])
|
||||
end_dt = datetime.fromisoformat(data["end_datetime"])
|
||||
delta = end_dt - start_dt
|
||||
delta = self.end_time - self.start_time
|
||||
total_s = delta.total_seconds()
|
||||
|
||||
total_steps = data["learning"]["total_time_steps"]
|
||||
total_steps = self.batch_size * self.num_episodes
|
||||
s_per_step = total_s / total_steps
|
||||
|
||||
num_nodes = self.env.num_nodes
|
||||
num_nodes = len(self.gym_env.game.simulation.network.nodes)
|
||||
num_intervals = total_steps / 100
|
||||
av_interval_time = total_s / num_intervals
|
||||
s_per_100_steps_10_nodes = av_interval_time / (num_nodes / 10)
|
||||
|
||||
return total_s, s_per_step, s_per_100_steps_10_nodes
|
||||
|
||||
def learn_metadata_dict(self) -> Dict[str, Any]:
|
||||
def generate_learn_metadata_dict(self) -> Dict[str, Any]:
|
||||
"""Metadata specific to the learning session."""
|
||||
total_s, s_per_step, s_per_100_steps_10_nodes = self._learn_benchmark_durations()
|
||||
self.gym_env.average_reward_per_episode.pop(0) # remove episode 0
|
||||
return {
|
||||
"total_episodes": self.env.actual_episode_count,
|
||||
"total_time_steps": self.env.total_step_count,
|
||||
"total_episodes": self.gym_env.episode_counter,
|
||||
"total_time_steps": self.gym_env.total_time_steps,
|
||||
"total_s": total_s,
|
||||
"s_per_step": s_per_step,
|
||||
"s_per_100_steps_10_nodes": s_per_100_steps_10_nodes,
|
||||
"av_reward_per_episode": self.learn_av_reward_per_episode_dict(),
|
||||
"av_reward_per_episode": self.gym_env.average_reward_per_episode,
|
||||
}
|
||||
|
||||
|
||||
def _get_benchmark_session_path(session_timestamp: datetime) -> Path:
|
||||
return _OUTPUT_ROOT / session_timestamp.strftime("%Y-%m-%d_%H-%M-%S")
|
||||
|
||||
|
||||
def _get_benchmark_primaite_session() -> BenchmarkPrimaiteSession:
|
||||
with patch("primaite.agents.agent_abc.get_session_path", _get_benchmark_session_path) as mck:
|
||||
mck.session_timestamp = datetime.now()
|
||||
return BenchmarkPrimaiteSession(_TRAINING_CONFIG_PATH, _LAY_DOWN_CONFIG_PATH)
|
||||
|
||||
|
||||
def _build_benchmark_results_dict(start_datetime: datetime, metadata_dict: Dict) -> dict:
|
||||
n = len(metadata_dict)
|
||||
with open(_TRAINING_CONFIG_PATH, "r") as file:
|
||||
training_config_dict = yaml.safe_load(file)
|
||||
with open(_LAY_DOWN_CONFIG_PATH, "r") as file:
|
||||
lay_down_config_dict = yaml.safe_load(file)
|
||||
averaged_data = {
|
||||
"start_timestamp": start_datetime.isoformat(),
|
||||
"end_datetime": datetime.now().isoformat(),
|
||||
"primaite_version": primaite.__version__,
|
||||
"system_info": _get_system_info(),
|
||||
"total_sessions": n,
|
||||
"total_episodes": sum(d["total_episodes"] for d in metadata_dict.values()),
|
||||
"total_time_steps": sum(d["total_time_steps"] for d in metadata_dict.values()),
|
||||
"av_s_per_session": sum(d["total_s"] for d in metadata_dict.values()) / n,
|
||||
"av_s_per_step": sum(d["s_per_step"] for d in metadata_dict.values()) / n,
|
||||
"av_s_per_100_steps_10_nodes": sum(d["s_per_100_steps_10_nodes"] for d in metadata_dict.values()) / n,
|
||||
"combined_av_reward_per_episode": {},
|
||||
"session_av_reward_per_episode": {k: v["av_reward_per_episode"] for k, v in metadata_dict.items()},
|
||||
"training_config": training_config_dict,
|
||||
"lay_down_config": lay_down_config_dict,
|
||||
}
|
||||
|
||||
episodes = metadata_dict[1]["av_reward_per_episode"].keys()
|
||||
|
||||
for episode in episodes:
|
||||
combined_av_reward = sum(metadata_dict[k]["av_reward_per_episode"][episode] for k in metadata_dict.keys()) / n
|
||||
averaged_data["combined_av_reward_per_episode"][episode] = combined_av_reward
|
||||
|
||||
return averaged_data
|
||||
|
||||
|
||||
def _get_df_from_episode_av_reward_dict(data: Dict) -> pl.DataFrame:
|
||||
data: Dict = {"episode": data.keys(), "av_reward": data.values()}
|
||||
|
||||
return (
|
||||
pl.from_dict(data)
|
||||
.with_columns(rolling_mean=pl.col("av_reward").rolling_mean(window_size=25))
|
||||
.rename({"rolling_mean": "rolling_av_reward"})
|
||||
)
|
||||
|
||||
|
||||
def _plot_benchmark_metadata(
|
||||
benchmark_metadata_dict: Dict,
|
||||
title: Optional[str] = None,
|
||||
subtitle: Optional[str] = None,
|
||||
) -> Figure:
|
||||
if title:
|
||||
if subtitle:
|
||||
title = f"{title} <br>{subtitle}</sup>"
|
||||
else:
|
||||
if subtitle:
|
||||
title = subtitle
|
||||
|
||||
config = get_plotly_config()
|
||||
layout = go.Layout(
|
||||
autosize=config["size"]["auto_size"],
|
||||
width=config["size"]["width"],
|
||||
height=config["size"]["height"],
|
||||
)
|
||||
# Create the line graph with a colored line
|
||||
fig = go.Figure(layout=layout)
|
||||
fig.update_layout(template=config["template"])
|
||||
|
||||
for session, av_reward_dict in benchmark_metadata_dict["session_av_reward_per_episode"].items():
|
||||
df = _get_df_from_episode_av_reward_dict(av_reward_dict)
|
||||
fig.add_trace(
|
||||
go.Scatter(
|
||||
x=df["episode"],
|
||||
y=df["av_reward"],
|
||||
mode="lines",
|
||||
name=f"Session {session}",
|
||||
opacity=0.25,
|
||||
line={"color": "#a6a6a6"},
|
||||
)
|
||||
)
|
||||
|
||||
df = _get_df_from_episode_av_reward_dict(benchmark_metadata_dict["combined_av_reward_per_episode"])
|
||||
fig.add_trace(
|
||||
go.Scatter(
|
||||
x=df["episode"], y=df["av_reward"], mode="lines", name="Combined Session Av", line={"color": "#FF0000"}
|
||||
)
|
||||
)
|
||||
|
||||
fig.add_trace(
|
||||
go.Scatter(
|
||||
x=df["episode"],
|
||||
y=df["rolling_av_reward"],
|
||||
mode="lines",
|
||||
name="Rolling Av (Combined Session Av)",
|
||||
line={"color": "#4CBB17"},
|
||||
)
|
||||
)
|
||||
|
||||
# Set the layout of the graph
|
||||
fig.update_layout(
|
||||
xaxis={
|
||||
"title": "Episode",
|
||||
"type": "linear",
|
||||
},
|
||||
yaxis={"title": "Average Reward"},
|
||||
title=title,
|
||||
)
|
||||
|
||||
return fig
|
||||
|
||||
|
||||
def _plot_all_benchmarks_combined_session_av() -> Figure:
|
||||
def _get_benchmark_primaite_environment() -> BenchmarkPrimaiteGymEnv:
|
||||
"""
|
||||
Plot the Benchmark results for each released version of PrimAITE.
|
||||
Create an instance of the BenchmarkPrimaiteGymEnv.
|
||||
|
||||
Does this by iterating over the ``benchmark/results`` directory and
|
||||
extracting the benchmark metadata json for each version that has been
|
||||
benchmarked. The combined_av_reward_per_episode is extracted from each,
|
||||
converted into a polars dataframe, and plotted as a scatter line in plotly.
|
||||
This environment will be used to train the agents on.
|
||||
"""
|
||||
title = "PrimAITE Versions Learning Benchmark"
|
||||
subtitle = "Rolling Av (Combined Session Av)"
|
||||
if title:
|
||||
if subtitle:
|
||||
title = f"{title} <br>{subtitle}</sup>"
|
||||
else:
|
||||
if subtitle:
|
||||
title = subtitle
|
||||
config = get_plotly_config()
|
||||
layout = go.Layout(
|
||||
autosize=config["size"]["auto_size"],
|
||||
width=config["size"]["width"],
|
||||
height=config["size"]["height"],
|
||||
)
|
||||
# Create the line graph with a colored line
|
||||
fig = go.Figure(layout=layout)
|
||||
fig.update_layout(template=config["template"])
|
||||
|
||||
for dir in _RESULTS_ROOT.iterdir():
|
||||
if dir.is_dir():
|
||||
metadata_file = dir / f"{dir.name}_benchmark_metadata.json"
|
||||
with open(metadata_file, "r") as file:
|
||||
metadata_dict = json.load(file)
|
||||
df = _get_df_from_episode_av_reward_dict(metadata_dict["combined_av_reward_per_episode"])
|
||||
|
||||
fig.add_trace(go.Scatter(x=df["episode"], y=df["rolling_av_reward"], mode="lines", name=dir.name))
|
||||
|
||||
# Set the layout of the graph
|
||||
fig.update_layout(
|
||||
xaxis={
|
||||
"title": "Episode",
|
||||
"type": "linear",
|
||||
},
|
||||
yaxis={"title": "Average Reward"},
|
||||
title=title,
|
||||
)
|
||||
fig["data"][0]["showlegend"] = True
|
||||
|
||||
return fig
|
||||
return BenchmarkPrimaiteGymEnv(env_config=data_manipulation_config_path())
|
||||
|
||||
|
||||
def run() -> None:
|
||||
def _prepare_session_directory():
|
||||
"""Prepare the session directory so that it is easier to clean up after the benchmarking is done."""
|
||||
# override session path
|
||||
session_path = _BENCHMARK_ROOT / "sessions"
|
||||
|
||||
if session_path.is_dir():
|
||||
shutil.rmtree(session_path)
|
||||
|
||||
primaite.PRIMAITE_PATHS.user_sessions_path = session_path
|
||||
primaite.PRIMAITE_PATHS.user_sessions_path.mkdir(exist_ok=True, parents=True)
|
||||
|
||||
|
||||
def run(number_of_sessions: int = 1, num_episodes: int = 3, batch_size: int = 128) -> None: # 10 # 1000 # 256
|
||||
"""Run the PrimAITE benchmark."""
|
||||
start_datetime = datetime.now()
|
||||
av_reward_per_episode_dicts = {}
|
||||
for i in range(1, 11):
|
||||
benchmark_start_time = datetime.now()
|
||||
|
||||
session_metadata_dict = {}
|
||||
|
||||
_prepare_session_directory()
|
||||
|
||||
# run training
|
||||
for i in range(1, number_of_sessions + 1):
|
||||
print(f"Starting Benchmark Session: {i}")
|
||||
with _get_benchmark_primaite_session() as session:
|
||||
session.learn()
|
||||
av_reward_per_episode_dicts[i] = session.learn_metadata_dict()
|
||||
|
||||
benchmark_metadata = _build_benchmark_results_dict(
|
||||
start_datetime=start_datetime, metadata_dict=av_reward_per_episode_dicts
|
||||
with _get_benchmark_primaite_environment() as gym_env:
|
||||
session = BenchmarkSession(gym_env=gym_env, num_episodes=num_episodes, batch_size=batch_size)
|
||||
session.train()
|
||||
session_metadata_dict[i] = session.session_metadata
|
||||
|
||||
# generate report
|
||||
build_benchmark_latex_report(
|
||||
benchmark_start_time=benchmark_start_time,
|
||||
session_metadata=session_metadata_dict,
|
||||
config_path=data_manipulation_config_path(),
|
||||
results_root_path=_RESULTS_ROOT,
|
||||
)
|
||||
v_str = f"v{primaite.__version__}"
|
||||
|
||||
version_result_dir = _RESULTS_ROOT / v_str
|
||||
if version_result_dir.exists():
|
||||
shutil.rmtree(version_result_dir)
|
||||
version_result_dir.mkdir(exist_ok=True, parents=True)
|
||||
|
||||
with open(version_result_dir / f"{v_str}_benchmark_metadata.json", "w") as file:
|
||||
json.dump(benchmark_metadata, file, indent=4)
|
||||
title = f"PrimAITE v{primaite.__version__.strip()} Learning Benchmark"
|
||||
fig = _plot_benchmark_metadata(benchmark_metadata, title=title)
|
||||
this_version_plot_path = version_result_dir / f"{title}.png"
|
||||
fig.write_image(this_version_plot_path)
|
||||
|
||||
fig = _plot_all_benchmarks_combined_session_av()
|
||||
|
||||
all_version_plot_path = _RESULTS_ROOT / "PrimAITE Versions Learning Benchmark.png"
|
||||
fig.write_image(all_version_plot_path)
|
||||
|
||||
_build_benchmark_latex_report(benchmark_metadata, this_version_plot_path, all_version_plot_path)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
122
benchmark/utils/benchmark.py
Normal file
122
benchmark/utils/benchmark.py
Normal file
@@ -0,0 +1,122 @@
|
||||
from datetime import datetime
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
from gymnasium.core import ObsType
|
||||
|
||||
from primaite.session.environment import PrimaiteGymEnv
|
||||
|
||||
|
||||
class BenchmarkPrimaiteGymEnv(PrimaiteGymEnv):
|
||||
"""
|
||||
Class that extends the PrimaiteGymEnv.
|
||||
|
||||
The reset method is extended so that the average rewards per episode are recorded.
|
||||
"""
|
||||
|
||||
total_time_steps: int = 0
|
||||
|
||||
def reset(self, seed: Optional[int] = None) -> Tuple[ObsType, Dict[str, Any]]:
|
||||
"""Overrides the PrimAITEGymEnv reset so that the total timesteps is saved."""
|
||||
self.total_time_steps += self.game.step_counter
|
||||
|
||||
return super().reset(seed=seed)
|
||||
|
||||
|
||||
#####################################
|
||||
# IGNORE BELOW FOR NOW
|
||||
#####################################
|
||||
|
||||
|
||||
class BenchMarkOSInfo:
|
||||
"""Operating System Information about the machine that run the benchmark."""
|
||||
|
||||
operating_system: str
|
||||
"""The operating system the benchmark was run on."""
|
||||
|
||||
operating_system_version: str
|
||||
"""The operating system version the benchmark was run on."""
|
||||
|
||||
machine: str
|
||||
"""The type of machine running the benchmark."""
|
||||
|
||||
processor: str
|
||||
"""The processor used to run the benchmark."""
|
||||
|
||||
|
||||
class BenchMarkCPUInfo:
|
||||
"""CPU Information of the machine that ran the benchmark."""
|
||||
|
||||
physical_cores: int
|
||||
"""The number of CPU cores the machine that ran the benchmark had."""
|
||||
|
||||
total_cores: int
|
||||
"""The number of total cores the machine that run the benchmark had."""
|
||||
|
||||
max_frequency: int
|
||||
"""The CPU's maximum clock speed."""
|
||||
|
||||
|
||||
class BenchMarkMemoryInfo:
|
||||
"""The Memory Information of the machine that ran the benchmark."""
|
||||
|
||||
total: str
|
||||
"""The total amount of memory."""
|
||||
|
||||
swap_total: str
|
||||
"""Virtual memory."""
|
||||
|
||||
|
||||
class BenchMarkGPUInfo:
|
||||
"""The GPU Information of the machine that ran the benchmark."""
|
||||
|
||||
name: str
|
||||
"""GPU name."""
|
||||
|
||||
total_memory: str
|
||||
"""GPU memory."""
|
||||
|
||||
|
||||
class BenchMarkSystemInfo:
|
||||
"""Overall system information of the machine that ran the benchmark."""
|
||||
|
||||
system: BenchMarkOSInfo
|
||||
cpu: BenchMarkCPUInfo
|
||||
memory: BenchMarkMemoryInfo
|
||||
gpu: List[BenchMarkMemoryInfo]
|
||||
|
||||
|
||||
class BenchMarkResult:
|
||||
"""Class containing the relevant benchmark results."""
|
||||
|
||||
benchmark_start_time: datetime
|
||||
"""Start time of the benchmark run."""
|
||||
|
||||
benchmark_end_time: datetime
|
||||
"""End time of the benchmark run."""
|
||||
|
||||
primaite_version: str
|
||||
"""The version of PrimAITE being benchmarked."""
|
||||
|
||||
system_info: BenchMarkSystemInfo
|
||||
"""System information of the machine that ran the benchmark."""
|
||||
|
||||
total_sessions: int
|
||||
"""The number of sessions that the benchmark ran."""
|
||||
|
||||
total_episodes: int
|
||||
"""The number of episodes over all the sessions that the benchmark ran."""
|
||||
|
||||
total_timesteps: int
|
||||
"""The number of timesteps over all the sessions that the benchmark ran."""
|
||||
|
||||
average_seconds_per_session: float
|
||||
"""The average time per session."""
|
||||
|
||||
average_seconds_per_step: float
|
||||
"""The average time per step."""
|
||||
|
||||
average_seconds_per_100_steps_and_10_nodes: float
|
||||
"""The average time per 100 steps on a 10 node network."""
|
||||
|
||||
combined_average_reward_per_episode: Dict
|
||||
"""tbd."""
|
||||
304
benchmark/utils/report.py
Normal file
304
benchmark/utils/report.py
Normal file
@@ -0,0 +1,304 @@
|
||||
# © Crown-owned copyright 2023, Defence Science and Technology Laboratory UK
|
||||
import json
|
||||
import shutil
|
||||
import sys
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import Dict, Optional
|
||||
|
||||
import plotly.graph_objects as go
|
||||
import polars as pl
|
||||
import yaml
|
||||
from plotly.graph_objs import Figure
|
||||
from pylatex import Command, Document
|
||||
from pylatex import Figure as LatexFigure
|
||||
from pylatex import Section, Subsection, Tabular
|
||||
from pylatex.utils import bold
|
||||
|
||||
import primaite
|
||||
from benchmark.utils.utils import _get_system_info
|
||||
|
||||
PLOT_CONFIG = {
|
||||
"size": {"auto_size": False, "width": 1500, "height": 900},
|
||||
"template": "plotly_white",
|
||||
"range_slider": False,
|
||||
}
|
||||
|
||||
|
||||
def _build_benchmark_results_dict(start_datetime: datetime, metadata_dict: Dict, config: Dict) -> dict:
|
||||
n = len(metadata_dict)
|
||||
|
||||
averaged_data = {
|
||||
"start_timestamp": start_datetime.isoformat(),
|
||||
"end_datetime": datetime.now().isoformat(),
|
||||
"primaite_version": primaite.__version__,
|
||||
"system_info": _get_system_info(),
|
||||
"total_sessions": n,
|
||||
"total_episodes": sum(d["total_episodes"] for d in metadata_dict.values()),
|
||||
"total_time_steps": sum(d["total_time_steps"] for d in metadata_dict.values()),
|
||||
"av_s_per_session": sum(d["total_s"] for d in metadata_dict.values()) / n,
|
||||
"av_s_per_step": sum(d["s_per_step"] for d in metadata_dict.values()) / n,
|
||||
"av_s_per_100_steps_10_nodes": sum(d["s_per_100_steps_10_nodes"] for d in metadata_dict.values()) / n,
|
||||
"combined_av_reward_per_episode": {},
|
||||
"session_av_reward_per_episode": {k: v["av_reward_per_episode"] for k, v in metadata_dict.items()},
|
||||
"config": config,
|
||||
}
|
||||
|
||||
episode_averages = [episode["av_reward_per_episode"] for episode in metadata_dict.values()]
|
||||
|
||||
episode = 0
|
||||
for episode_average in episode_averages:
|
||||
episode += 1
|
||||
averaged_data["combined_av_reward_per_episode"][str(episode)] = episode_average
|
||||
|
||||
return averaged_data
|
||||
|
||||
|
||||
def _get_df_from_episode_av_reward_dict(data: Dict) -> pl.DataFrame:
|
||||
data: Dict = {"episode": data.keys(), "av_reward": data.values()}
|
||||
|
||||
return (
|
||||
pl.from_dict(data)
|
||||
.with_columns(rolling_mean=pl.col("av_reward").rolling_mean(window_size=25))
|
||||
.rename({"rolling_mean": "rolling_av_reward"})
|
||||
)
|
||||
|
||||
|
||||
def _plot_benchmark_metadata(
|
||||
benchmark_metadata_dict: Dict,
|
||||
title: Optional[str] = None,
|
||||
subtitle: Optional[str] = None,
|
||||
) -> Figure:
|
||||
if title:
|
||||
if subtitle:
|
||||
title = f"{title} <br>{subtitle}</sup>"
|
||||
else:
|
||||
if subtitle:
|
||||
title = subtitle
|
||||
|
||||
layout = go.Layout(
|
||||
autosize=PLOT_CONFIG["size"]["auto_size"],
|
||||
width=PLOT_CONFIG["size"]["width"],
|
||||
height=PLOT_CONFIG["size"]["height"],
|
||||
)
|
||||
# Create the line graph with a colored line
|
||||
fig = go.Figure(layout=layout)
|
||||
fig.update_layout(template=PLOT_CONFIG["template"])
|
||||
|
||||
for session, av_reward_dict in benchmark_metadata_dict["session_av_reward_per_episode"].items():
|
||||
df = _get_df_from_episode_av_reward_dict(av_reward_dict)
|
||||
fig.add_trace(
|
||||
go.Scatter(
|
||||
x=df["episode"],
|
||||
y=df["av_reward"],
|
||||
mode="lines",
|
||||
name=f"Session {session}",
|
||||
opacity=0.25,
|
||||
line={"color": "#a6a6a6"},
|
||||
)
|
||||
)
|
||||
|
||||
df = _get_df_from_episode_av_reward_dict(benchmark_metadata_dict["combined_av_reward_per_episode"])
|
||||
fig.add_trace(
|
||||
go.Scatter(
|
||||
x=df["episode"], y=df["av_reward"], mode="lines", name="Combined Session Av", line={"color": "#FF0000"}
|
||||
)
|
||||
)
|
||||
|
||||
fig.add_trace(
|
||||
go.Scatter(
|
||||
x=df["episode"],
|
||||
y=df["rolling_av_reward"],
|
||||
mode="lines",
|
||||
name="Rolling Av (Combined Session Av)",
|
||||
line={"color": "#4CBB17"},
|
||||
)
|
||||
)
|
||||
|
||||
# Set the layout of the graph
|
||||
fig.update_layout(
|
||||
xaxis={
|
||||
"title": "Episode",
|
||||
"type": "linear",
|
||||
},
|
||||
yaxis={"title": "Average Reward"},
|
||||
title=title,
|
||||
)
|
||||
|
||||
return fig
|
||||
|
||||
|
||||
def _plot_all_benchmarks_combined_session_av(results_directory: Path) -> Figure:
|
||||
"""
|
||||
Plot the Benchmark results for each released version of PrimAITE.
|
||||
|
||||
Does this by iterating over the ``benchmark/results`` directory and
|
||||
extracting the benchmark metadata json for each version that has been
|
||||
benchmarked. The combined_av_reward_per_episode is extracted from each,
|
||||
converted into a polars dataframe, and plotted as a scatter line in plotly.
|
||||
"""
|
||||
title = "PrimAITE Versions Learning Benchmark"
|
||||
subtitle = "Rolling Av (Combined Session Av)"
|
||||
if title:
|
||||
if subtitle:
|
||||
title = f"{title} <br>{subtitle}</sup>"
|
||||
else:
|
||||
if subtitle:
|
||||
title = subtitle
|
||||
layout = go.Layout(
|
||||
autosize=PLOT_CONFIG["size"]["auto_size"],
|
||||
width=PLOT_CONFIG["size"]["width"],
|
||||
height=PLOT_CONFIG["size"]["height"],
|
||||
)
|
||||
# Create the line graph with a colored line
|
||||
fig = go.Figure(layout=layout)
|
||||
fig.update_layout(template=PLOT_CONFIG["template"])
|
||||
|
||||
for dir in results_directory.iterdir():
|
||||
if dir.is_dir():
|
||||
metadata_file = dir / f"{dir.name}_benchmark_metadata.json"
|
||||
with open(metadata_file, "r") as file:
|
||||
metadata_dict = json.load(file)
|
||||
df = _get_df_from_episode_av_reward_dict(metadata_dict["combined_av_reward_per_episode"])
|
||||
|
||||
fig.add_trace(go.Scatter(x=df["episode"], y=df["rolling_av_reward"], mode="lines", name=dir.name))
|
||||
|
||||
# Set the layout of the graph
|
||||
fig.update_layout(
|
||||
xaxis={
|
||||
"title": "Episode",
|
||||
"type": "linear",
|
||||
},
|
||||
yaxis={"title": "Average Reward"},
|
||||
title=title,
|
||||
)
|
||||
fig["data"][0]["showlegend"] = True
|
||||
|
||||
return fig
|
||||
|
||||
|
||||
def build_benchmark_latex_report(
|
||||
benchmark_start_time: datetime, session_metadata: Dict, config_path: Path, results_root_path: Path
|
||||
) -> None:
|
||||
"""Generates a latex report of the benchmark run."""
|
||||
# generate report folder
|
||||
v_str = f"v{primaite.__version__}"
|
||||
|
||||
version_result_dir = results_root_path / v_str
|
||||
if version_result_dir.exists():
|
||||
shutil.rmtree(version_result_dir)
|
||||
version_result_dir.mkdir(exist_ok=True, parents=True)
|
||||
|
||||
# load the config file as dict
|
||||
with open(config_path, "r") as f:
|
||||
cfg_data = yaml.safe_load(f)
|
||||
|
||||
# generate the benchmark metadata dict
|
||||
benchmark_metadata_dict = _build_benchmark_results_dict(
|
||||
start_datetime=benchmark_start_time, metadata_dict=session_metadata, config=cfg_data
|
||||
)
|
||||
|
||||
with open(version_result_dir / f"{v_str}_benchmark_metadata.json", "w") as file:
|
||||
json.dump(benchmark_metadata_dict, file, indent=4)
|
||||
title = f"PrimAITE v{primaite.__version__.strip()} Learning Benchmark"
|
||||
fig = _plot_benchmark_metadata(benchmark_metadata_dict, title=title)
|
||||
this_version_plot_path = version_result_dir / f"{title}.png"
|
||||
fig.write_image(this_version_plot_path)
|
||||
|
||||
fig = _plot_all_benchmarks_combined_session_av()
|
||||
|
||||
all_version_plot_path = results_root_path / "PrimAITE Versions Learning Benchmark.png"
|
||||
fig.write_image(all_version_plot_path)
|
||||
|
||||
geometry_options = {"tmargin": "2.5cm", "rmargin": "2.5cm", "bmargin": "2.5cm", "lmargin": "2.5cm"}
|
||||
data = benchmark_metadata_dict
|
||||
primaite_version = data["primaite_version"]
|
||||
|
||||
# Create a new document
|
||||
doc = Document("report", geometry_options=geometry_options)
|
||||
# Title
|
||||
doc.preamble.append(Command("title", f"PrimAITE {primaite_version} Learning Benchmark"))
|
||||
doc.preamble.append(Command("author", "PrimAITE Dev Team"))
|
||||
doc.preamble.append(Command("date", datetime.now().date()))
|
||||
doc.append(Command("maketitle"))
|
||||
|
||||
sessions = data["total_sessions"]
|
||||
episodes = data["training_config"]["num_train_episodes"]
|
||||
steps = data["training_config"]["num_train_steps"]
|
||||
|
||||
# Body
|
||||
with doc.create(Section("Introduction")):
|
||||
doc.append(
|
||||
f"PrimAITE v{primaite_version} was benchmarked automatically upon release. Learning rate metrics "
|
||||
f"were captured to be referenced during system-level testing and user acceptance testing (UAT)."
|
||||
)
|
||||
doc.append(
|
||||
f"\nThe benchmarking process consists of running {sessions} training session using the same "
|
||||
f"training and lay down config files. Each session trains an agent for {episodes} episodes, "
|
||||
f"with each episode consisting of {steps} steps."
|
||||
)
|
||||
doc.append(
|
||||
f"\nThe mean reward per episode from each session is captured. This is then used to calculate a "
|
||||
f"combined average reward per episode from the {sessions} individual sessions for smoothing. "
|
||||
f"Finally, a 25-widow rolling average of the combined average reward per session is calculated for "
|
||||
f"further smoothing."
|
||||
)
|
||||
|
||||
with doc.create(Section("System Information")):
|
||||
with doc.create(Subsection("Python")):
|
||||
with doc.create(Tabular("|l|l|")) as table:
|
||||
table.add_hline()
|
||||
table.add_row((bold("Version"), sys.version))
|
||||
table.add_hline()
|
||||
for section, section_data in data["system_info"].items():
|
||||
if section_data:
|
||||
with doc.create(Subsection(section)):
|
||||
if isinstance(section_data, dict):
|
||||
with doc.create(Tabular("|l|l|")) as table:
|
||||
table.add_hline()
|
||||
for key, value in section_data.items():
|
||||
table.add_row((bold(key), value))
|
||||
table.add_hline()
|
||||
elif isinstance(section_data, list):
|
||||
headers = section_data[0].keys()
|
||||
tabs_str = "|".join(["l" for _ in range(len(headers))])
|
||||
tabs_str = f"|{tabs_str}|"
|
||||
with doc.create(Tabular(tabs_str)) as table:
|
||||
table.add_hline()
|
||||
table.add_row([bold(h) for h in headers])
|
||||
table.add_hline()
|
||||
for item in section_data:
|
||||
table.add_row(item.values())
|
||||
table.add_hline()
|
||||
|
||||
headers_map = {
|
||||
"total_sessions": "Total Sessions",
|
||||
"total_episodes": "Total Episodes",
|
||||
"total_time_steps": "Total Steps",
|
||||
"av_s_per_session": "Av Session Duration (s)",
|
||||
"av_s_per_step": "Av Step Duration (s)",
|
||||
"av_s_per_100_steps_10_nodes": "Av Duration per 100 Steps per 10 Nodes (s)",
|
||||
}
|
||||
with doc.create(Section("Stats")):
|
||||
with doc.create(Subsection("Benchmark Results")):
|
||||
with doc.create(Tabular("|l|l|")) as table:
|
||||
table.add_hline()
|
||||
for section, header in headers_map.items():
|
||||
if section.startswith("av_"):
|
||||
table.add_row((bold(header), f"{data[section]:.4f}"))
|
||||
else:
|
||||
table.add_row((bold(header), data[section]))
|
||||
table.add_hline()
|
||||
|
||||
with doc.create(Section("Graphs")):
|
||||
with doc.create(Subsection(f"PrimAITE {primaite_version} Learning Benchmark Plot")):
|
||||
with doc.create(LatexFigure(position="h!")) as pic:
|
||||
pic.add_image(str(this_version_plot_path))
|
||||
pic.add_caption(f"PrimAITE {primaite_version} Learning Benchmark Plot")
|
||||
|
||||
with doc.create(Subsection("PrimAITE All Versions Learning Benchmark Plot")):
|
||||
with doc.create(LatexFigure(position="h!")) as pic:
|
||||
pic.add_image(str(all_version_plot_path))
|
||||
pic.add_caption("PrimAITE All Versions Learning Benchmark Plot")
|
||||
|
||||
doc.generate_pdf(str(this_version_plot_path).replace(".png", ""), clean_tex=True)
|
||||
47
benchmark/utils/utils.py
Normal file
47
benchmark/utils/utils.py
Normal file
@@ -0,0 +1,47 @@
|
||||
# © Crown-owned copyright 2023, Defence Science and Technology Laboratory UK
|
||||
import platform
|
||||
from typing import Dict
|
||||
|
||||
import psutil
|
||||
from GPUtil import GPUtil
|
||||
|
||||
|
||||
def get_size(size_bytes: int) -> str:
|
||||
"""
|
||||
Scale bytes to its proper format.
|
||||
|
||||
e.g:
|
||||
1253656 => '1.20MB'
|
||||
1253656678 => '1.17GB'
|
||||
|
||||
:
|
||||
"""
|
||||
factor = 1024
|
||||
for unit in ["", "K", "M", "G", "T", "P"]:
|
||||
if size_bytes < factor:
|
||||
return f"{size_bytes:.2f}{unit}B"
|
||||
size_bytes /= factor
|
||||
|
||||
|
||||
def _get_system_info() -> Dict:
|
||||
"""Builds and returns a dict containing system info."""
|
||||
uname = platform.uname()
|
||||
cpu_freq = psutil.cpu_freq()
|
||||
virtual_mem = psutil.virtual_memory()
|
||||
swap_mem = psutil.swap_memory()
|
||||
gpus = GPUtil.getGPUs()
|
||||
return {
|
||||
"System": {
|
||||
"OS": uname.system,
|
||||
"OS Version": uname.version,
|
||||
"Machine": uname.machine,
|
||||
"Processor": uname.processor,
|
||||
},
|
||||
"CPU": {
|
||||
"Physical Cores": psutil.cpu_count(logical=False),
|
||||
"Total Cores": psutil.cpu_count(logical=True),
|
||||
"Max Frequency": f"{cpu_freq.max:.2f}Mhz",
|
||||
},
|
||||
"Memory": {"Total": get_size(virtual_mem.total), "Swap Total": get_size(swap_mem.total)},
|
||||
"GPU": [{"Name": gpu.name, "Total Memory": f"{gpu.memoryTotal}MB"} for gpu in gpus],
|
||||
}
|
||||
Reference in New Issue
Block a user