diff --git a/.gitignore b/.gitignore index c3d54ada..2ba8d4a7 100644 --- a/.gitignore +++ b/.gitignore @@ -164,3 +164,7 @@ src/primaite/notebooks/scratch.py sandbox.py sandbox/ sandbox.ipynb + +# benchmarking +**/benchmark/sessions/ +**/benchmark/output/ diff --git a/benchmark/benchmark.py b/benchmark/benchmark.py new file mode 100644 index 00000000..5212b5d2 --- /dev/null +++ b/benchmark/benchmark.py @@ -0,0 +1,21 @@ +from typing import Any, Dict, Optional, Tuple + +from gymnasium.core import ObsType + +from primaite.session.environment import PrimaiteGymEnv + + +class BenchmarkPrimaiteGymEnv(PrimaiteGymEnv): + """ + Class that extends the PrimaiteGymEnv. + + The reset method is extended so that the average rewards per episode are recorded. + """ + + total_time_steps: int = 0 + + def reset(self, seed: Optional[int] = None) -> Tuple[ObsType, Dict[str, Any]]: + """Overrides the PrimAITEGymEnv reset so that the total timesteps is saved.""" + self.total_time_steps += self.game.step_counter + + return super().reset(seed=seed) diff --git a/benchmark/primaite_benchmark.py b/benchmark/primaite_benchmark.py index 226bb71e..3263867e 100644 --- a/benchmark/primaite_benchmark.py +++ b/benchmark/primaite_benchmark.py @@ -1,211 +1,93 @@ -# flake8: noqa -raise DeprecationWarning( - "Benchmarking depends on deprecated functionality and it has not been updated to primaite v3 yet." -) # © Crown-owned copyright 2023, Defence Science and Technology Laboratory UK import json -import platform import shutil -import sys from datetime import datetime from pathlib import Path -from typing import Any, Dict, Final, Optional, Tuple, Union -from unittest.mock import patch +from typing import Any, Dict, Final, Tuple -import GPUtil -import plotly.graph_objects as go -import polars as pl -import psutil -import yaml -from plotly.graph_objs import Figure -from pylatex import Command, Document -from pylatex import Figure as LatexFigure -from pylatex import Section, Subsection, Tabular -from pylatex.utils import bold +from report import build_benchmark_latex_report +from stable_baselines3 import PPO import primaite -from primaite.config.lay_down_config import data_manipulation_config_path -from primaite.data_viz.session_plots import get_plotly_config -from primaite.environment.primaite_env import Primaite -from primaite.primaite_session import PrimaiteSession +from benchmark import BenchmarkPrimaiteGymEnv +from primaite.config.load import data_manipulation_config_path _LOGGER = primaite.getLogger(__name__) +_MAJOR_V = primaite.__version__.split(".")[0] + _BENCHMARK_ROOT = Path(__file__).parent -_RESULTS_ROOT: Final[Path] = _BENCHMARK_ROOT / "results" -_RESULTS_ROOT.mkdir(exist_ok=True, parents=True) +_RESULTS_ROOT: Final[Path] = _BENCHMARK_ROOT / "results" / f"v{_MAJOR_V}" +_VERSION_ROOT: Final[Path] = _RESULTS_ROOT / f"v{primaite.__version__}" +_SESSION_METADATA_ROOT: Final[Path] = _VERSION_ROOT / "session_metadata" -_OUTPUT_ROOT: Final[Path] = _BENCHMARK_ROOT / "output" -# Clear and recreate the output directory -if _OUTPUT_ROOT.exists(): - shutil.rmtree(_OUTPUT_ROOT) -_OUTPUT_ROOT.mkdir() - -_TRAINING_CONFIG_PATH = _BENCHMARK_ROOT / "config" / "benchmark_training_config.yaml" -_LAY_DOWN_CONFIG_PATH = data_manipulation_config_path() +_SESSION_METADATA_ROOT.mkdir(parents=True, exist_ok=True) -def get_size(size_bytes: int) -> str: - """ - Scale bytes to its proper format. +class BenchmarkSession: + """Benchmark Session class.""" - e.g: - 1253656 => '1.20MB' - 1253656678 => '1.17GB' + gym_env: BenchmarkPrimaiteGymEnv + """Gym environment used by the session to train.""" - : - """ - factor = 1024 - for unit in ["", "K", "M", "G", "T", "P"]: - if size_bytes < factor: - return f"{size_bytes:.2f}{unit}B" - size_bytes /= factor + num_episodes: int + """Number of episodes to run the training session.""" + episode_len: int + """The number of steps per episode.""" -def _get_system_info() -> Dict: - """Builds and returns a dict containing system info.""" - uname = platform.uname() - cpu_freq = psutil.cpu_freq() - virtual_mem = psutil.virtual_memory() - swap_mem = psutil.swap_memory() - gpus = GPUtil.getGPUs() - return { - "System": { - "OS": uname.system, - "OS Version": uname.version, - "Machine": uname.machine, - "Processor": uname.processor, - }, - "CPU": { - "Physical Cores": psutil.cpu_count(logical=False), - "Total Cores": psutil.cpu_count(logical=True), - "Max Frequency": f"{cpu_freq.max:.2f}Mhz", - }, - "Memory": {"Total": get_size(virtual_mem.total), "Swap Total": get_size(swap_mem.total)}, - "GPU": [{"Name": gpu.name, "Total Memory": f"{gpu.memoryTotal}MB"} for gpu in gpus], - } + total_steps: int + """Number of steps to run the training session.""" + batch_size: int + """Number of steps for each episode.""" -def _build_benchmark_latex_report( - benchmark_metadata_dict: Dict, this_version_plot_path: Path, all_version_plot_path: Path -) -> None: - geometry_options = {"tmargin": "2.5cm", "rmargin": "2.5cm", "bmargin": "2.5cm", "lmargin": "2.5cm"} - data = benchmark_metadata_dict - primaite_version = data["primaite_version"] + learning_rate: float + """Learning rate for the model.""" - # Create a new document - doc = Document("report", geometry_options=geometry_options) - # Title - doc.preamble.append(Command("title", f"PrimAITE {primaite_version} Learning Benchmark")) - doc.preamble.append(Command("author", "PrimAITE Dev Team")) - doc.preamble.append(Command("date", datetime.now().date())) - doc.append(Command("maketitle")) + start_time: datetime + """Start time for the session.""" - sessions = data["total_sessions"] - episodes = data["training_config"]["num_train_episodes"] - steps = data["training_config"]["num_train_steps"] - - # Body - with doc.create(Section("Introduction")): - doc.append( - f"PrimAITE v{primaite_version} was benchmarked automatically upon release. Learning rate metrics " - f"were captured to be referenced during system-level testing and user acceptance testing (UAT)." - ) - doc.append( - f"\nThe benchmarking process consists of running {sessions} training session using the same " - f"training and lay down config files. Each session trains an agent for {episodes} episodes, " - f"with each episode consisting of {steps} steps." - ) - doc.append( - f"\nThe mean reward per episode from each session is captured. This is then used to calculate a " - f"combined average reward per episode from the {sessions} individual sessions for smoothing. " - f"Finally, a 25-widow rolling average of the combined average reward per session is calculated for " - f"further smoothing." - ) - - with doc.create(Section("System Information")): - with doc.create(Subsection("Python")): - with doc.create(Tabular("|l|l|")) as table: - table.add_hline() - table.add_row((bold("Version"), sys.version)) - table.add_hline() - for section, section_data in data["system_info"].items(): - if section_data: - with doc.create(Subsection(section)): - if isinstance(section_data, dict): - with doc.create(Tabular("|l|l|")) as table: - table.add_hline() - for key, value in section_data.items(): - table.add_row((bold(key), value)) - table.add_hline() - elif isinstance(section_data, list): - headers = section_data[0].keys() - tabs_str = "|".join(["l" for _ in range(len(headers))]) - tabs_str = f"|{tabs_str}|" - with doc.create(Tabular(tabs_str)) as table: - table.add_hline() - table.add_row([bold(h) for h in headers]) - table.add_hline() - for item in section_data: - table.add_row(item.values()) - table.add_hline() - - headers_map = { - "total_sessions": "Total Sessions", - "total_episodes": "Total Episodes", - "total_time_steps": "Total Steps", - "av_s_per_session": "Av Session Duration (s)", - "av_s_per_step": "Av Step Duration (s)", - "av_s_per_100_steps_10_nodes": "Av Duration per 100 Steps per 10 Nodes (s)", - } - with doc.create(Section("Stats")): - with doc.create(Subsection("Benchmark Results")): - with doc.create(Tabular("|l|l|")) as table: - table.add_hline() - for section, header in headers_map.items(): - if section.startswith("av_"): - table.add_row((bold(header), f"{data[section]:.4f}")) - else: - table.add_row((bold(header), data[section])) - table.add_hline() - - with doc.create(Section("Graphs")): - with doc.create(Subsection(f"PrimAITE {primaite_version} Learning Benchmark Plot")): - with doc.create(LatexFigure(position="h!")) as pic: - pic.add_image(str(this_version_plot_path)) - pic.add_caption(f"PrimAITE {primaite_version} Learning Benchmark Plot") - - with doc.create(Subsection("PrimAITE All Versions Learning Benchmark Plot")): - with doc.create(LatexFigure(position="h!")) as pic: - pic.add_image(str(all_version_plot_path)) - pic.add_caption("PrimAITE All Versions Learning Benchmark Plot") - - doc.generate_pdf(str(this_version_plot_path).replace(".png", ""), clean_tex=True) - - -class BenchmarkPrimaiteSession(PrimaiteSession): - """A benchmarking primaite session.""" + end_time: datetime + """End time for the session.""" def __init__( self, - training_config_path: Union[str, Path], - lay_down_config_path: Union[str, Path], - ) -> None: - super().__init__(training_config_path, lay_down_config_path) - self.setup() + gym_env: BenchmarkPrimaiteGymEnv, + episode_len: int, + num_episodes: int, + n_steps: int, + batch_size: int, + learning_rate: float, + ): + """Initialise the BenchmarkSession.""" + self.gym_env = gym_env + self.episode_len = episode_len + self.n_steps = n_steps + self.num_episodes = num_episodes + self.total_steps = self.num_episodes * self.episode_len + self.batch_size = batch_size + self.learning_rate = learning_rate - @property - def env(self) -> Primaite: - """Direct access to the env for ease of testing.""" - return self._agent_session._env # noqa + def train(self): + """Run the training session.""" + # start timer for session + self.start_time = datetime.now() + model = PPO( + policy="MlpPolicy", + env=self.gym_env, + learning_rate=self.learning_rate, + n_steps=self.n_steps, + batch_size=self.batch_size, + verbose=0, + tensorboard_log="./PPO_UC2/", + ) + model.learn(total_timesteps=self.total_steps) - def __enter__(self) -> "BenchmarkPrimaiteSession": - return self + # end timer for session + self.end_time = datetime.now() - # TODO: typehints uncertain - def __exit__(self, type: Any, value: Any, tb: Any) -> None: - shutil.rmtree(self.session_path) - _LOGGER.debug(f"Deleted benchmark session directory: {self.session_path}") + self.session_metadata = self.generate_learn_metadata_dict() def _learn_benchmark_durations(self) -> Tuple[float, float, float]: """ @@ -219,235 +101,99 @@ class BenchmarkPrimaiteSession(PrimaiteSession): :return: The learning benchmark durations as a Tuple of three floats: Tuple[total_s, s_per_step, s_per_100_steps_10_nodes]. """ - data = self.metadata_file_as_dict() - start_dt = datetime.fromisoformat(data["start_datetime"]) - end_dt = datetime.fromisoformat(data["end_datetime"]) - delta = end_dt - start_dt + delta = self.end_time - self.start_time total_s = delta.total_seconds() - total_steps = data["learning"]["total_time_steps"] + total_steps = self.batch_size * self.num_episodes s_per_step = total_s / total_steps - num_nodes = self.env.num_nodes + num_nodes = len(self.gym_env.game.simulation.network.nodes) num_intervals = total_steps / 100 av_interval_time = total_s / num_intervals s_per_100_steps_10_nodes = av_interval_time / (num_nodes / 10) return total_s, s_per_step, s_per_100_steps_10_nodes - def learn_metadata_dict(self) -> Dict[str, Any]: + def generate_learn_metadata_dict(self) -> Dict[str, Any]: """Metadata specific to the learning session.""" total_s, s_per_step, s_per_100_steps_10_nodes = self._learn_benchmark_durations() + self.gym_env.average_reward_per_episode.pop(0) # remove episode 0 return { - "total_episodes": self.env.actual_episode_count, - "total_time_steps": self.env.total_step_count, + "total_episodes": self.gym_env.episode_counter, + "total_time_steps": self.gym_env.total_time_steps, "total_s": total_s, "s_per_step": s_per_step, "s_per_100_steps_10_nodes": s_per_100_steps_10_nodes, - "av_reward_per_episode": self.learn_av_reward_per_episode_dict(), + "av_reward_per_episode": self.gym_env.average_reward_per_episode, } -def _get_benchmark_session_path(session_timestamp: datetime) -> Path: - return _OUTPUT_ROOT / session_timestamp.strftime("%Y-%m-%d_%H-%M-%S") - - -def _get_benchmark_primaite_session() -> BenchmarkPrimaiteSession: - with patch("primaite.agents.agent_abc.get_session_path", _get_benchmark_session_path) as mck: - mck.session_timestamp = datetime.now() - return BenchmarkPrimaiteSession(_TRAINING_CONFIG_PATH, _LAY_DOWN_CONFIG_PATH) - - -def _build_benchmark_results_dict(start_datetime: datetime, metadata_dict: Dict) -> dict: - n = len(metadata_dict) - with open(_TRAINING_CONFIG_PATH, "r") as file: - training_config_dict = yaml.safe_load(file) - with open(_LAY_DOWN_CONFIG_PATH, "r") as file: - lay_down_config_dict = yaml.safe_load(file) - averaged_data = { - "start_timestamp": start_datetime.isoformat(), - "end_datetime": datetime.now().isoformat(), - "primaite_version": primaite.__version__, - "system_info": _get_system_info(), - "total_sessions": n, - "total_episodes": sum(d["total_episodes"] for d in metadata_dict.values()), - "total_time_steps": sum(d["total_time_steps"] for d in metadata_dict.values()), - "av_s_per_session": sum(d["total_s"] for d in metadata_dict.values()) / n, - "av_s_per_step": sum(d["s_per_step"] for d in metadata_dict.values()) / n, - "av_s_per_100_steps_10_nodes": sum(d["s_per_100_steps_10_nodes"] for d in metadata_dict.values()) / n, - "combined_av_reward_per_episode": {}, - "session_av_reward_per_episode": {k: v["av_reward_per_episode"] for k, v in metadata_dict.items()}, - "training_config": training_config_dict, - "lay_down_config": lay_down_config_dict, - } - - episodes = metadata_dict[1]["av_reward_per_episode"].keys() - - for episode in episodes: - combined_av_reward = sum(metadata_dict[k]["av_reward_per_episode"][episode] for k in metadata_dict.keys()) / n - averaged_data["combined_av_reward_per_episode"][episode] = combined_av_reward - - return averaged_data - - -def _get_df_from_episode_av_reward_dict(data: Dict) -> pl.DataFrame: - data: Dict = {"episode": data.keys(), "av_reward": data.values()} - - return ( - pl.from_dict(data) - .with_columns(rolling_mean=pl.col("av_reward").rolling_mean(window_size=25)) - .rename({"rolling_mean": "rolling_av_reward"}) - ) - - -def _plot_benchmark_metadata( - benchmark_metadata_dict: Dict, - title: Optional[str] = None, - subtitle: Optional[str] = None, -) -> Figure: - if title: - if subtitle: - title = f"{title}
{subtitle}" - else: - if subtitle: - title = subtitle - - config = get_plotly_config() - layout = go.Layout( - autosize=config["size"]["auto_size"], - width=config["size"]["width"], - height=config["size"]["height"], - ) - # Create the line graph with a colored line - fig = go.Figure(layout=layout) - fig.update_layout(template=config["template"]) - - for session, av_reward_dict in benchmark_metadata_dict["session_av_reward_per_episode"].items(): - df = _get_df_from_episode_av_reward_dict(av_reward_dict) - fig.add_trace( - go.Scatter( - x=df["episode"], - y=df["av_reward"], - mode="lines", - name=f"Session {session}", - opacity=0.25, - line={"color": "#a6a6a6"}, - ) - ) - - df = _get_df_from_episode_av_reward_dict(benchmark_metadata_dict["combined_av_reward_per_episode"]) - fig.add_trace( - go.Scatter( - x=df["episode"], y=df["av_reward"], mode="lines", name="Combined Session Av", line={"color": "#FF0000"} - ) - ) - - fig.add_trace( - go.Scatter( - x=df["episode"], - y=df["rolling_av_reward"], - mode="lines", - name="Rolling Av (Combined Session Av)", - line={"color": "#4CBB17"}, - ) - ) - - # Set the layout of the graph - fig.update_layout( - xaxis={ - "title": "Episode", - "type": "linear", - }, - yaxis={"title": "Average Reward"}, - title=title, - ) - - return fig - - -def _plot_all_benchmarks_combined_session_av() -> Figure: +def _get_benchmark_primaite_environment() -> BenchmarkPrimaiteGymEnv: """ - Plot the Benchmark results for each released version of PrimAITE. + Create an instance of the BenchmarkPrimaiteGymEnv. - Does this by iterating over the ``benchmark/results`` directory and - extracting the benchmark metadata json for each version that has been - benchmarked. The combined_av_reward_per_episode is extracted from each, - converted into a polars dataframe, and plotted as a scatter line in plotly. + This environment will be used to train the agents on. """ - title = "PrimAITE Versions Learning Benchmark" - subtitle = "Rolling Av (Combined Session Av)" - if title: - if subtitle: - title = f"{title}
{subtitle}" - else: - if subtitle: - title = subtitle - config = get_plotly_config() - layout = go.Layout( - autosize=config["size"]["auto_size"], - width=config["size"]["width"], - height=config["size"]["height"], - ) - # Create the line graph with a colored line - fig = go.Figure(layout=layout) - fig.update_layout(template=config["template"]) - - for dir in _RESULTS_ROOT.iterdir(): - if dir.is_dir(): - metadata_file = dir / f"{dir.name}_benchmark_metadata.json" - with open(metadata_file, "r") as file: - metadata_dict = json.load(file) - df = _get_df_from_episode_av_reward_dict(metadata_dict["combined_av_reward_per_episode"]) - - fig.add_trace(go.Scatter(x=df["episode"], y=df["rolling_av_reward"], mode="lines", name=dir.name)) - - # Set the layout of the graph - fig.update_layout( - xaxis={ - "title": "Episode", - "type": "linear", - }, - yaxis={"title": "Average Reward"}, - title=title, - ) - fig["data"][0]["showlegend"] = True - - return fig + env = BenchmarkPrimaiteGymEnv(env_config=data_manipulation_config_path()) + return env -def run() -> None: +def _prepare_session_directory(): + """Prepare the session directory so that it is easier to clean up after the benchmarking is done.""" + # override session path + session_path = _BENCHMARK_ROOT / "sessions" + + if session_path.is_dir(): + shutil.rmtree(session_path) + + primaite.PRIMAITE_PATHS.user_sessions_path = session_path + primaite.PRIMAITE_PATHS.user_sessions_path.mkdir(exist_ok=True, parents=True) + + +def run( + number_of_sessions: int = 2, + num_episodes: int = 5, + episode_len: int = 128, + n_steps: int = 1280, + batch_size: int = 32, + learning_rate: float = 3e-4, +) -> None: """Run the PrimAITE benchmark.""" - start_datetime = datetime.now() - av_reward_per_episode_dicts = {} - for i in range(1, 11): + benchmark_start_time = datetime.now() + + session_metadata_dict = {} + + _prepare_session_directory() + + # run training + for i in range(1, number_of_sessions + 1): print(f"Starting Benchmark Session: {i}") - with _get_benchmark_primaite_session() as session: - session.learn() - av_reward_per_episode_dicts[i] = session.learn_metadata_dict() - benchmark_metadata = _build_benchmark_results_dict( - start_datetime=start_datetime, metadata_dict=av_reward_per_episode_dicts + with _get_benchmark_primaite_environment() as gym_env: + session = BenchmarkSession( + gym_env=gym_env, + num_episodes=num_episodes, + n_steps=n_steps, + episode_len=episode_len, + batch_size=batch_size, + learning_rate=learning_rate, + ) + session.train() + + # Dump the session metadata so that we're not holding it in memory as it's large + with open(_SESSION_METADATA_ROOT / f"{i}.json", "w") as file: + json.dump(session.session_metadata, file, indent=4) + + for i in range(1, number_of_sessions + 1): + with open(_SESSION_METADATA_ROOT / f"{i}.json", "r") as file: + session_metadata_dict[i] = json.load(file) + # generate report + build_benchmark_latex_report( + benchmark_start_time=benchmark_start_time, + session_metadata=session_metadata_dict, + config_path=data_manipulation_config_path(), + results_root_path=_RESULTS_ROOT, ) - v_str = f"v{primaite.__version__}" - - version_result_dir = _RESULTS_ROOT / v_str - if version_result_dir.exists(): - shutil.rmtree(version_result_dir) - version_result_dir.mkdir(exist_ok=True, parents=True) - - with open(version_result_dir / f"{v_str}_benchmark_metadata.json", "w") as file: - json.dump(benchmark_metadata, file, indent=4) - title = f"PrimAITE v{primaite.__version__.strip()} Learning Benchmark" - fig = _plot_benchmark_metadata(benchmark_metadata, title=title) - this_version_plot_path = version_result_dir / f"{title}.png" - fig.write_image(this_version_plot_path) - - fig = _plot_all_benchmarks_combined_session_av() - - all_version_plot_path = _RESULTS_ROOT / "PrimAITE Versions Learning Benchmark.png" - fig.write_image(all_version_plot_path) - - _build_benchmark_latex_report(benchmark_metadata, this_version_plot_path, all_version_plot_path) if __name__ == "__main__": diff --git a/benchmark/report.py b/benchmark/report.py new file mode 100644 index 00000000..7d4c27a4 --- /dev/null +++ b/benchmark/report.py @@ -0,0 +1,305 @@ +# © Crown-owned copyright 2023, Defence Science and Technology Laboratory UK +import json +import sys +from datetime import datetime +from pathlib import Path +from typing import Dict, Optional + +import plotly.graph_objects as go +import polars as pl +import yaml +from plotly.graph_objs import Figure +from pylatex import Command, Document +from pylatex import Figure as LatexFigure +from pylatex import Section, Subsection, Tabular +from pylatex.utils import bold +from utils import _get_system_info + +import primaite + +PLOT_CONFIG = { + "size": {"auto_size": False, "width": 1500, "height": 900}, + "template": "plotly_white", + "range_slider": False, +} + + +def _build_benchmark_results_dict(start_datetime: datetime, metadata_dict: Dict, config: Dict) -> dict: + num_sessions = len(metadata_dict) # number of sessions + + averaged_data = { + "start_timestamp": start_datetime.isoformat(), + "end_datetime": datetime.now().isoformat(), + "primaite_version": primaite.__version__, + "system_info": _get_system_info(), + "total_sessions": num_sessions, + "total_episodes": sum(d["total_episodes"] for d in metadata_dict.values()), + "total_time_steps": sum(d["total_time_steps"] for d in metadata_dict.values()), + "av_s_per_session": sum(d["total_s"] for d in metadata_dict.values()) / num_sessions, + "av_s_per_step": sum(d["s_per_step"] for d in metadata_dict.values()) / num_sessions, + "av_s_per_100_steps_10_nodes": sum(d["s_per_100_steps_10_nodes"] for d in metadata_dict.values()) + / num_sessions, + "combined_av_reward_per_episode": {}, + "session_av_reward_per_episode": {k: v["av_reward_per_episode"] for k, v in metadata_dict.items()}, + "config": config, + } + + # find the average of each episode across all sessions + episodes = metadata_dict[1]["av_reward_per_episode"].keys() + + for episode in episodes: + combined_av_reward = ( + sum(metadata_dict[k]["av_reward_per_episode"][episode] for k in metadata_dict.keys()) / num_sessions + ) + averaged_data["combined_av_reward_per_episode"][episode] = combined_av_reward + + return averaged_data + + +def _get_df_from_episode_av_reward_dict(data: Dict) -> pl.DataFrame: + data: Dict = {"episode": data.keys(), "av_reward": data.values()} + + return ( + pl.from_dict(data) + .with_columns(rolling_mean=pl.col("av_reward").rolling_mean(window_size=25)) + .rename({"rolling_mean": "rolling_av_reward"}) + ) + + +def _plot_benchmark_metadata( + benchmark_metadata_dict: Dict, + title: Optional[str] = None, + subtitle: Optional[str] = None, +) -> Figure: + if title: + if subtitle: + title = f"{title}
{subtitle}" + else: + if subtitle: + title = subtitle + + layout = go.Layout( + autosize=PLOT_CONFIG["size"]["auto_size"], + width=PLOT_CONFIG["size"]["width"], + height=PLOT_CONFIG["size"]["height"], + ) + # Create the line graph with a colored line + fig = go.Figure(layout=layout) + fig.update_layout(template=PLOT_CONFIG["template"]) + + for session, av_reward_dict in benchmark_metadata_dict["session_av_reward_per_episode"].items(): + df = _get_df_from_episode_av_reward_dict(av_reward_dict) + fig.add_trace( + go.Scatter( + x=df["episode"], + y=df["av_reward"], + mode="lines", + name=f"Session {session}", + opacity=0.25, + line={"color": "#a6a6a6"}, + ) + ) + + df = _get_df_from_episode_av_reward_dict(benchmark_metadata_dict["combined_av_reward_per_episode"]) + fig.add_trace( + go.Scatter( + x=df["episode"], y=df["av_reward"], mode="lines", name="Combined Session Av", line={"color": "#FF0000"} + ) + ) + + fig.add_trace( + go.Scatter( + x=df["episode"], + y=df["rolling_av_reward"], + mode="lines", + name="Rolling Av (Combined Session Av)", + line={"color": "#4CBB17"}, + ) + ) + + # Set the layout of the graph + fig.update_layout( + xaxis={ + "title": "Episode", + "type": "linear", + }, + yaxis={"title": "Total Reward"}, + title=title, + ) + + return fig + + +def _plot_all_benchmarks_combined_session_av(results_directory: Path) -> Figure: + """ + Plot the Benchmark results for each released version of PrimAITE. + + Does this by iterating over the ``benchmark/results`` directory and + extracting the benchmark metadata json for each version that has been + benchmarked. The combined_av_reward_per_episode is extracted from each, + converted into a polars dataframe, and plotted as a scatter line in plotly. + """ + major_v = primaite.__version__.split(".")[0] + title = f"Learning Benchmarking of All Released Versions under Major v{major_v}.*.*" + subtitle = "Rolling Av (Combined Session Av)" + if title: + if subtitle: + title = f"{title}
{subtitle}" + else: + if subtitle: + title = subtitle + layout = go.Layout( + autosize=PLOT_CONFIG["size"]["auto_size"], + width=PLOT_CONFIG["size"]["width"], + height=PLOT_CONFIG["size"]["height"], + ) + # Create the line graph with a colored line + fig = go.Figure(layout=layout) + fig.update_layout(template=PLOT_CONFIG["template"]) + + for dir in results_directory.iterdir(): + if dir.is_dir(): + metadata_file = dir / f"{dir.name}_benchmark_metadata.json" + with open(metadata_file, "r") as file: + metadata_dict = json.load(file) + df = _get_df_from_episode_av_reward_dict(metadata_dict["combined_av_reward_per_episode"]) + + fig.add_trace(go.Scatter(x=df["episode"], y=df["rolling_av_reward"], mode="lines", name=dir.name)) + + # Set the layout of the graph + fig.update_layout( + xaxis={ + "title": "Episode", + "type": "linear", + }, + yaxis={"title": "Total Reward"}, + title=title, + ) + fig["data"][0]["showlegend"] = True + + return fig + + +def build_benchmark_latex_report( + benchmark_start_time: datetime, session_metadata: Dict, config_path: Path, results_root_path: Path +) -> None: + """Generates a latex report of the benchmark run.""" + # generate report folder + v_str = f"v{primaite.__version__}" + + version_result_dir = results_root_path / v_str + version_result_dir.mkdir(exist_ok=True, parents=True) + + # load the config file as dict + with open(config_path, "r") as f: + cfg_data = yaml.safe_load(f) + + # generate the benchmark metadata dict + benchmark_metadata_dict = _build_benchmark_results_dict( + start_datetime=benchmark_start_time, metadata_dict=session_metadata, config=cfg_data + ) + major_v = primaite.__version__.split(".")[0] + with open(version_result_dir / f"{v_str}_benchmark_metadata.json", "w") as file: + json.dump(benchmark_metadata_dict, file, indent=4) + title = f"PrimAITE v{primaite.__version__.strip()} Learning Benchmark" + fig = _plot_benchmark_metadata(benchmark_metadata_dict, title=title) + this_version_plot_path = version_result_dir / f"{title}.png" + fig.write_image(this_version_plot_path) + + fig = _plot_all_benchmarks_combined_session_av(results_directory=results_root_path) + + all_version_plot_path = results_root_path / "PrimAITE Versions Learning Benchmark.png" + fig.write_image(all_version_plot_path) + + geometry_options = {"tmargin": "2.5cm", "rmargin": "2.5cm", "bmargin": "2.5cm", "lmargin": "2.5cm"} + data = benchmark_metadata_dict + primaite_version = data["primaite_version"] + + # Create a new document + doc = Document("report", geometry_options=geometry_options) + # Title + doc.preamble.append(Command("title", f"PrimAITE {primaite_version} Learning Benchmark")) + doc.preamble.append(Command("author", "PrimAITE Dev Team")) + doc.preamble.append(Command("date", datetime.now().date())) + doc.append(Command("maketitle")) + + sessions = data["total_sessions"] + episodes = session_metadata[1]["total_episodes"] - 1 + steps = data["config"]["game"]["max_episode_length"] + + # Body + with doc.create(Section("Introduction")): + doc.append( + f"PrimAITE v{primaite_version} was benchmarked automatically upon release. Learning rate metrics " + f"were captured to be referenced during system-level testing and user acceptance testing (UAT)." + ) + doc.append( + f"\nThe benchmarking process consists of running {sessions} training session using the same " + f"config file. Each session trains an agent for {episodes} episodes, " + f"with each episode consisting of {steps} steps." + ) + doc.append( + f"\nThe total reward per episode from each session is captured. This is then used to calculate an " + f"caverage total reward per episode from the {sessions} individual sessions for smoothing. " + f"Finally, a 25-widow rolling average of the average total reward per session is calculated for " + f"further smoothing." + ) + + with doc.create(Section("System Information")): + with doc.create(Subsection("Python")): + with doc.create(Tabular("|l|l|")) as table: + table.add_hline() + table.add_row((bold("Version"), sys.version)) + table.add_hline() + for section, section_data in data["system_info"].items(): + if section_data: + with doc.create(Subsection(section)): + if isinstance(section_data, dict): + with doc.create(Tabular("|l|l|")) as table: + table.add_hline() + for key, value in section_data.items(): + table.add_row((bold(key), value)) + table.add_hline() + elif isinstance(section_data, list): + headers = section_data[0].keys() + tabs_str = "|".join(["l" for _ in range(len(headers))]) + tabs_str = f"|{tabs_str}|" + with doc.create(Tabular(tabs_str)) as table: + table.add_hline() + table.add_row([bold(h) for h in headers]) + table.add_hline() + for item in section_data: + table.add_row(item.values()) + table.add_hline() + + headers_map = { + "total_sessions": "Total Sessions", + "total_episodes": "Total Episodes", + "total_time_steps": "Total Steps", + "av_s_per_session": "Av Session Duration (s)", + "av_s_per_step": "Av Step Duration (s)", + "av_s_per_100_steps_10_nodes": "Av Duration per 100 Steps per 10 Nodes (s)", + } + with doc.create(Section("Stats")): + with doc.create(Subsection("Benchmark Results")): + with doc.create(Tabular("|l|l|")) as table: + table.add_hline() + for section, header in headers_map.items(): + if section.startswith("av_"): + table.add_row((bold(header), f"{data[section]:.4f}")) + else: + table.add_row((bold(header), data[section])) + table.add_hline() + + with doc.create(Section("Graphs")): + with doc.create(Subsection(f"v{primaite_version} Learning Benchmark Plot")): + with doc.create(LatexFigure(position="h!")) as pic: + pic.add_image(str(this_version_plot_path)) + pic.add_caption(f"PrimAITE {primaite_version} Learning Benchmark Plot") + + with doc.create(Subsection(f"Learning Benchmarking of All Released Versions under Major v{major_v}.*.*")): + with doc.create(LatexFigure(position="h!")) as pic: + pic.add_image(str(all_version_plot_path)) + pic.add_caption(f"Learning Benchmarking of All Released Versions under Major v{major_v}.*.*") + + doc.generate_pdf(str(this_version_plot_path).replace(".png", ""), clean_tex=True) diff --git a/benchmark/results/PrimAITE Versions Learning Benchmark.png b/benchmark/results/v2/PrimAITE Versions Learning Benchmark.png similarity index 100% rename from benchmark/results/PrimAITE Versions Learning Benchmark.png rename to benchmark/results/v2/PrimAITE Versions Learning Benchmark.png diff --git a/benchmark/results/v2.0.0/PrimAITE v2.0.0 Learning Benchmark.pdf b/benchmark/results/v2/v2.0.0/PrimAITE v2.0.0 Learning Benchmark.pdf similarity index 100% rename from benchmark/results/v2.0.0/PrimAITE v2.0.0 Learning Benchmark.pdf rename to benchmark/results/v2/v2.0.0/PrimAITE v2.0.0 Learning Benchmark.pdf diff --git a/benchmark/results/v2.0.0/PrimAITE v2.0.0 Learning Benchmark.png b/benchmark/results/v2/v2.0.0/PrimAITE v2.0.0 Learning Benchmark.png similarity index 100% rename from benchmark/results/v2.0.0/PrimAITE v2.0.0 Learning Benchmark.png rename to benchmark/results/v2/v2.0.0/PrimAITE v2.0.0 Learning Benchmark.png diff --git a/benchmark/results/v2.0.0/v2.0.0_benchmark_metadata.json b/benchmark/results/v2/v2.0.0/v2.0.0_benchmark_metadata.json similarity index 100% rename from benchmark/results/v2.0.0/v2.0.0_benchmark_metadata.json rename to benchmark/results/v2/v2.0.0/v2.0.0_benchmark_metadata.json diff --git a/benchmark/utils.py b/benchmark/utils.py new file mode 100644 index 00000000..f15c4a12 --- /dev/null +++ b/benchmark/utils.py @@ -0,0 +1,47 @@ +# © Crown-owned copyright 2023, Defence Science and Technology Laboratory UK +import platform +from typing import Dict + +import psutil +from GPUtil import GPUtil + + +def get_size(size_bytes: int) -> str: + """ + Scale bytes to its proper format. + + e.g: + 1253656 => '1.20MB' + 1253656678 => '1.17GB' + + : + """ + factor = 1024 + for unit in ["", "K", "M", "G", "T", "P"]: + if size_bytes < factor: + return f"{size_bytes:.2f}{unit}B" + size_bytes /= factor + + +def _get_system_info() -> Dict: + """Builds and returns a dict containing system info.""" + uname = platform.uname() + cpu_freq = psutil.cpu_freq() + virtual_mem = psutil.virtual_memory() + swap_mem = psutil.swap_memory() + gpus = GPUtil.getGPUs() + return { + "System": { + "OS": uname.system, + "OS Version": uname.version, + "Machine": uname.machine, + "Processor": uname.processor, + }, + "CPU": { + "Physical Cores": psutil.cpu_count(logical=False), + "Total Cores": psutil.cpu_count(logical=True), + "Max Frequency": f"{cpu_freq.max:.2f}Mhz", + }, + "Memory": {"Total": get_size(virtual_mem.total), "Swap Total": get_size(swap_mem.total)}, + "GPU": [{"Name": gpu.name, "Total Memory": f"{gpu.memoryTotal}MB"} for gpu in gpus], + } diff --git a/pyproject.toml b/pyproject.toml index d01299be..290720bc 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -33,7 +33,7 @@ dependencies = [ "numpy==1.23.5", "platformdirs==3.5.1", "plotly==5.15.0", - "polars==0.18.4", + "polars==0.20.30", "prettytable==3.8.0", "PyYAML==6.0", "typer[all]==0.9.0", diff --git a/src/primaite/VERSION b/src/primaite/VERSION index 9e0b71d0..4a36342f 100644 --- a/src/primaite/VERSION +++ b/src/primaite/VERSION @@ -1 +1 @@ -3.0.0b9 +3.0.0 diff --git a/src/primaite/session/environment.py b/src/primaite/session/environment.py index 477efa9b..1c605f59 100644 --- a/src/primaite/session/environment.py +++ b/src/primaite/session/environment.py @@ -37,6 +37,8 @@ class PrimaiteGymEnv(gymnasium.Env): """Name of the RL agent. Since there should only be one RL agent we can just pull the first and only key.""" self.episode_counter: int = 0 """Current episode number.""" + self.average_reward_per_episode: Dict[int, float] = {} + """Average rewards of agents per episode.""" @property def agent(self) -> ProxyAgent: @@ -89,6 +91,8 @@ class PrimaiteGymEnv(gymnasium.Env): f"Resetting environment, episode {self.episode_counter}, " f"avg. reward: {self.agent.reward_function.total_reward}" ) + self.average_reward_per_episode[self.episode_counter] = self.agent.reward_function.total_reward + if self.io.settings.save_agent_actions: all_agent_actions = {name: agent.history for name, agent in self.game.agents.items()} self.io.write_agent_log(agent_actions=all_agent_actions, episode=self.episode_counter)