diff --git a/.gitignore b/.gitignore index c3d54ada..4bb125da 100644 --- a/.gitignore +++ b/.gitignore @@ -164,3 +164,6 @@ src/primaite/notebooks/scratch.py sandbox.py sandbox/ sandbox.ipynb + +# benchmarking +**/benchmark_session/ diff --git a/benchmark/primaite_benchmark.py b/benchmark/primaite_benchmark.py index 226bb71e..c68d5a68 100644 --- a/benchmark/primaite_benchmark.py +++ b/benchmark/primaite_benchmark.py @@ -1,37 +1,19 @@ -# flake8: noqa -raise DeprecationWarning( - "Benchmarking depends on deprecated functionality and it has not been updated to primaite v3 yet." -) # © Crown-owned copyright 2023, Defence Science and Technology Laboratory UK -import json -import platform import shutil -import sys from datetime import datetime from pathlib import Path -from typing import Any, Dict, Final, Optional, Tuple, Union -from unittest.mock import patch +from typing import Any, Dict, Final, Tuple -import GPUtil -import plotly.graph_objects as go -import polars as pl -import psutil -import yaml -from plotly.graph_objs import Figure -from pylatex import Command, Document -from pylatex import Figure as LatexFigure -from pylatex import Section, Subsection, Tabular -from pylatex.utils import bold +from stable_baselines3 import PPO import primaite -from primaite.config.lay_down_config import data_manipulation_config_path -from primaite.data_viz.session_plots import get_plotly_config -from primaite.environment.primaite_env import Primaite -from primaite.primaite_session import PrimaiteSession +from benchmark.utils.benchmark import BenchmarkPrimaiteGymEnv +from benchmark.utils.report import build_benchmark_latex_report +from primaite.config.load import data_manipulation_config_path _LOGGER = primaite.getLogger(__name__) -_BENCHMARK_ROOT = Path(__file__).parent +_BENCHMARK_ROOT = Path(__file__).parent / "benchmark_session" _RESULTS_ROOT: Final[Path] = _BENCHMARK_ROOT / "results" _RESULTS_ROOT.mkdir(exist_ok=True, parents=True) @@ -41,171 +23,51 @@ if _OUTPUT_ROOT.exists(): shutil.rmtree(_OUTPUT_ROOT) _OUTPUT_ROOT.mkdir() -_TRAINING_CONFIG_PATH = _BENCHMARK_ROOT / "config" / "benchmark_training_config.yaml" -_LAY_DOWN_CONFIG_PATH = data_manipulation_config_path() +class BenchmarkSession: + """Benchmark Session class.""" -def get_size(size_bytes: int) -> str: - """ - Scale bytes to its proper format. + gym_env: BenchmarkPrimaiteGymEnv + """Gym environment used by the session to train.""" - e.g: - 1253656 => '1.20MB' - 1253656678 => '1.17GB' + num_episodes: int + """Number of episodes to run the training session.""" - : - """ - factor = 1024 - for unit in ["", "K", "M", "G", "T", "P"]: - if size_bytes < factor: - return f"{size_bytes:.2f}{unit}B" - size_bytes /= factor + batch_size: int + """Number of steps for each episode.""" + start_time: datetime + """Start time for the session.""" -def _get_system_info() -> Dict: - """Builds and returns a dict containing system info.""" - uname = platform.uname() - cpu_freq = psutil.cpu_freq() - virtual_mem = psutil.virtual_memory() - swap_mem = psutil.swap_memory() - gpus = GPUtil.getGPUs() - return { - "System": { - "OS": uname.system, - "OS Version": uname.version, - "Machine": uname.machine, - "Processor": uname.processor, - }, - "CPU": { - "Physical Cores": psutil.cpu_count(logical=False), - "Total Cores": psutil.cpu_count(logical=True), - "Max Frequency": f"{cpu_freq.max:.2f}Mhz", - }, - "Memory": {"Total": get_size(virtual_mem.total), "Swap Total": get_size(swap_mem.total)}, - "GPU": [{"Name": gpu.name, "Total Memory": f"{gpu.memoryTotal}MB"} for gpu in gpus], - } + end_time: datetime + """End time for the session.""" + session_metadata: Dict + """Dict containing the metadata for the session - used to generate benchmark report.""" -def _build_benchmark_latex_report( - benchmark_metadata_dict: Dict, this_version_plot_path: Path, all_version_plot_path: Path -) -> None: - geometry_options = {"tmargin": "2.5cm", "rmargin": "2.5cm", "bmargin": "2.5cm", "lmargin": "2.5cm"} - data = benchmark_metadata_dict - primaite_version = data["primaite_version"] + def __init__(self, gym_env: BenchmarkPrimaiteGymEnv, num_episodes: int, batch_size: int): + """Initialise the BenchmarkSession.""" + self.gym_env = gym_env + self.num_episodes = num_episodes + self.batch_size = batch_size - # Create a new document - doc = Document("report", geometry_options=geometry_options) - # Title - doc.preamble.append(Command("title", f"PrimAITE {primaite_version} Learning Benchmark")) - doc.preamble.append(Command("author", "PrimAITE Dev Team")) - doc.preamble.append(Command("date", datetime.now().date())) - doc.append(Command("maketitle")) + def train(self): + """Run the training session.""" + # start timer for session + self.start_time = datetime.now() - sessions = data["total_sessions"] - episodes = data["training_config"]["num_train_episodes"] - steps = data["training_config"]["num_train_steps"] - - # Body - with doc.create(Section("Introduction")): - doc.append( - f"PrimAITE v{primaite_version} was benchmarked automatically upon release. Learning rate metrics " - f"were captured to be referenced during system-level testing and user acceptance testing (UAT)." - ) - doc.append( - f"\nThe benchmarking process consists of running {sessions} training session using the same " - f"training and lay down config files. Each session trains an agent for {episodes} episodes, " - f"with each episode consisting of {steps} steps." - ) - doc.append( - f"\nThe mean reward per episode from each session is captured. This is then used to calculate a " - f"combined average reward per episode from the {sessions} individual sessions for smoothing. " - f"Finally, a 25-widow rolling average of the combined average reward per session is calculated for " - f"further smoothing." + model = PPO( + policy="MlpPolicy", + env=self.gym_env, + batch_size=self.batch_size, + n_steps=self.batch_size * self.num_episodes, ) + model.learn(total_timesteps=self.batch_size * self.num_episodes) - with doc.create(Section("System Information")): - with doc.create(Subsection("Python")): - with doc.create(Tabular("|l|l|")) as table: - table.add_hline() - table.add_row((bold("Version"), sys.version)) - table.add_hline() - for section, section_data in data["system_info"].items(): - if section_data: - with doc.create(Subsection(section)): - if isinstance(section_data, dict): - with doc.create(Tabular("|l|l|")) as table: - table.add_hline() - for key, value in section_data.items(): - table.add_row((bold(key), value)) - table.add_hline() - elif isinstance(section_data, list): - headers = section_data[0].keys() - tabs_str = "|".join(["l" for _ in range(len(headers))]) - tabs_str = f"|{tabs_str}|" - with doc.create(Tabular(tabs_str)) as table: - table.add_hline() - table.add_row([bold(h) for h in headers]) - table.add_hline() - for item in section_data: - table.add_row(item.values()) - table.add_hline() + # end timer for session + self.end_time = datetime.now() - headers_map = { - "total_sessions": "Total Sessions", - "total_episodes": "Total Episodes", - "total_time_steps": "Total Steps", - "av_s_per_session": "Av Session Duration (s)", - "av_s_per_step": "Av Step Duration (s)", - "av_s_per_100_steps_10_nodes": "Av Duration per 100 Steps per 10 Nodes (s)", - } - with doc.create(Section("Stats")): - with doc.create(Subsection("Benchmark Results")): - with doc.create(Tabular("|l|l|")) as table: - table.add_hline() - for section, header in headers_map.items(): - if section.startswith("av_"): - table.add_row((bold(header), f"{data[section]:.4f}")) - else: - table.add_row((bold(header), data[section])) - table.add_hline() - - with doc.create(Section("Graphs")): - with doc.create(Subsection(f"PrimAITE {primaite_version} Learning Benchmark Plot")): - with doc.create(LatexFigure(position="h!")) as pic: - pic.add_image(str(this_version_plot_path)) - pic.add_caption(f"PrimAITE {primaite_version} Learning Benchmark Plot") - - with doc.create(Subsection("PrimAITE All Versions Learning Benchmark Plot")): - with doc.create(LatexFigure(position="h!")) as pic: - pic.add_image(str(all_version_plot_path)) - pic.add_caption("PrimAITE All Versions Learning Benchmark Plot") - - doc.generate_pdf(str(this_version_plot_path).replace(".png", ""), clean_tex=True) - - -class BenchmarkPrimaiteSession(PrimaiteSession): - """A benchmarking primaite session.""" - - def __init__( - self, - training_config_path: Union[str, Path], - lay_down_config_path: Union[str, Path], - ) -> None: - super().__init__(training_config_path, lay_down_config_path) - self.setup() - - @property - def env(self) -> Primaite: - """Direct access to the env for ease of testing.""" - return self._agent_session._env # noqa - - def __enter__(self) -> "BenchmarkPrimaiteSession": - return self - - # TODO: typehints uncertain - def __exit__(self, type: Any, value: Any, tb: Any) -> None: - shutil.rmtree(self.session_path) - _LOGGER.debug(f"Deleted benchmark session directory: {self.session_path}") + self.session_metadata = self.generate_learn_metadata_dict() def _learn_benchmark_durations(self) -> Tuple[float, float, float]: """ @@ -219,235 +81,78 @@ class BenchmarkPrimaiteSession(PrimaiteSession): :return: The learning benchmark durations as a Tuple of three floats: Tuple[total_s, s_per_step, s_per_100_steps_10_nodes]. """ - data = self.metadata_file_as_dict() - start_dt = datetime.fromisoformat(data["start_datetime"]) - end_dt = datetime.fromisoformat(data["end_datetime"]) - delta = end_dt - start_dt + delta = self.end_time - self.start_time total_s = delta.total_seconds() - total_steps = data["learning"]["total_time_steps"] + total_steps = self.batch_size * self.num_episodes s_per_step = total_s / total_steps - num_nodes = self.env.num_nodes + num_nodes = len(self.gym_env.game.simulation.network.nodes) num_intervals = total_steps / 100 av_interval_time = total_s / num_intervals s_per_100_steps_10_nodes = av_interval_time / (num_nodes / 10) return total_s, s_per_step, s_per_100_steps_10_nodes - def learn_metadata_dict(self) -> Dict[str, Any]: + def generate_learn_metadata_dict(self) -> Dict[str, Any]: """Metadata specific to the learning session.""" total_s, s_per_step, s_per_100_steps_10_nodes = self._learn_benchmark_durations() + self.gym_env.average_reward_per_episode.pop(0) # remove episode 0 return { - "total_episodes": self.env.actual_episode_count, - "total_time_steps": self.env.total_step_count, + "total_episodes": self.gym_env.episode_counter, + "total_time_steps": self.gym_env.total_time_steps, "total_s": total_s, "s_per_step": s_per_step, "s_per_100_steps_10_nodes": s_per_100_steps_10_nodes, - "av_reward_per_episode": self.learn_av_reward_per_episode_dict(), + "av_reward_per_episode": self.gym_env.average_reward_per_episode, } -def _get_benchmark_session_path(session_timestamp: datetime) -> Path: - return _OUTPUT_ROOT / session_timestamp.strftime("%Y-%m-%d_%H-%M-%S") - - -def _get_benchmark_primaite_session() -> BenchmarkPrimaiteSession: - with patch("primaite.agents.agent_abc.get_session_path", _get_benchmark_session_path) as mck: - mck.session_timestamp = datetime.now() - return BenchmarkPrimaiteSession(_TRAINING_CONFIG_PATH, _LAY_DOWN_CONFIG_PATH) - - -def _build_benchmark_results_dict(start_datetime: datetime, metadata_dict: Dict) -> dict: - n = len(metadata_dict) - with open(_TRAINING_CONFIG_PATH, "r") as file: - training_config_dict = yaml.safe_load(file) - with open(_LAY_DOWN_CONFIG_PATH, "r") as file: - lay_down_config_dict = yaml.safe_load(file) - averaged_data = { - "start_timestamp": start_datetime.isoformat(), - "end_datetime": datetime.now().isoformat(), - "primaite_version": primaite.__version__, - "system_info": _get_system_info(), - "total_sessions": n, - "total_episodes": sum(d["total_episodes"] for d in metadata_dict.values()), - "total_time_steps": sum(d["total_time_steps"] for d in metadata_dict.values()), - "av_s_per_session": sum(d["total_s"] for d in metadata_dict.values()) / n, - "av_s_per_step": sum(d["s_per_step"] for d in metadata_dict.values()) / n, - "av_s_per_100_steps_10_nodes": sum(d["s_per_100_steps_10_nodes"] for d in metadata_dict.values()) / n, - "combined_av_reward_per_episode": {}, - "session_av_reward_per_episode": {k: v["av_reward_per_episode"] for k, v in metadata_dict.items()}, - "training_config": training_config_dict, - "lay_down_config": lay_down_config_dict, - } - - episodes = metadata_dict[1]["av_reward_per_episode"].keys() - - for episode in episodes: - combined_av_reward = sum(metadata_dict[k]["av_reward_per_episode"][episode] for k in metadata_dict.keys()) / n - averaged_data["combined_av_reward_per_episode"][episode] = combined_av_reward - - return averaged_data - - -def _get_df_from_episode_av_reward_dict(data: Dict) -> pl.DataFrame: - data: Dict = {"episode": data.keys(), "av_reward": data.values()} - - return ( - pl.from_dict(data) - .with_columns(rolling_mean=pl.col("av_reward").rolling_mean(window_size=25)) - .rename({"rolling_mean": "rolling_av_reward"}) - ) - - -def _plot_benchmark_metadata( - benchmark_metadata_dict: Dict, - title: Optional[str] = None, - subtitle: Optional[str] = None, -) -> Figure: - if title: - if subtitle: - title = f"{title}
{subtitle}" - else: - if subtitle: - title = subtitle - - config = get_plotly_config() - layout = go.Layout( - autosize=config["size"]["auto_size"], - width=config["size"]["width"], - height=config["size"]["height"], - ) - # Create the line graph with a colored line - fig = go.Figure(layout=layout) - fig.update_layout(template=config["template"]) - - for session, av_reward_dict in benchmark_metadata_dict["session_av_reward_per_episode"].items(): - df = _get_df_from_episode_av_reward_dict(av_reward_dict) - fig.add_trace( - go.Scatter( - x=df["episode"], - y=df["av_reward"], - mode="lines", - name=f"Session {session}", - opacity=0.25, - line={"color": "#a6a6a6"}, - ) - ) - - df = _get_df_from_episode_av_reward_dict(benchmark_metadata_dict["combined_av_reward_per_episode"]) - fig.add_trace( - go.Scatter( - x=df["episode"], y=df["av_reward"], mode="lines", name="Combined Session Av", line={"color": "#FF0000"} - ) - ) - - fig.add_trace( - go.Scatter( - x=df["episode"], - y=df["rolling_av_reward"], - mode="lines", - name="Rolling Av (Combined Session Av)", - line={"color": "#4CBB17"}, - ) - ) - - # Set the layout of the graph - fig.update_layout( - xaxis={ - "title": "Episode", - "type": "linear", - }, - yaxis={"title": "Average Reward"}, - title=title, - ) - - return fig - - -def _plot_all_benchmarks_combined_session_av() -> Figure: +def _get_benchmark_primaite_environment() -> BenchmarkPrimaiteGymEnv: """ - Plot the Benchmark results for each released version of PrimAITE. + Create an instance of the BenchmarkPrimaiteGymEnv. - Does this by iterating over the ``benchmark/results`` directory and - extracting the benchmark metadata json for each version that has been - benchmarked. The combined_av_reward_per_episode is extracted from each, - converted into a polars dataframe, and plotted as a scatter line in plotly. + This environment will be used to train the agents on. """ - title = "PrimAITE Versions Learning Benchmark" - subtitle = "Rolling Av (Combined Session Av)" - if title: - if subtitle: - title = f"{title}
{subtitle}" - else: - if subtitle: - title = subtitle - config = get_plotly_config() - layout = go.Layout( - autosize=config["size"]["auto_size"], - width=config["size"]["width"], - height=config["size"]["height"], - ) - # Create the line graph with a colored line - fig = go.Figure(layout=layout) - fig.update_layout(template=config["template"]) - - for dir in _RESULTS_ROOT.iterdir(): - if dir.is_dir(): - metadata_file = dir / f"{dir.name}_benchmark_metadata.json" - with open(metadata_file, "r") as file: - metadata_dict = json.load(file) - df = _get_df_from_episode_av_reward_dict(metadata_dict["combined_av_reward_per_episode"]) - - fig.add_trace(go.Scatter(x=df["episode"], y=df["rolling_av_reward"], mode="lines", name=dir.name)) - - # Set the layout of the graph - fig.update_layout( - xaxis={ - "title": "Episode", - "type": "linear", - }, - yaxis={"title": "Average Reward"}, - title=title, - ) - fig["data"][0]["showlegend"] = True - - return fig + return BenchmarkPrimaiteGymEnv(env_config=data_manipulation_config_path()) -def run() -> None: +def _prepare_session_directory(): + """Prepare the session directory so that it is easier to clean up after the benchmarking is done.""" + # override session path + session_path = _BENCHMARK_ROOT / "sessions" + + if session_path.is_dir(): + shutil.rmtree(session_path) + + primaite.PRIMAITE_PATHS.user_sessions_path = session_path + primaite.PRIMAITE_PATHS.user_sessions_path.mkdir(exist_ok=True, parents=True) + + +def run(number_of_sessions: int = 1, num_episodes: int = 3, batch_size: int = 128) -> None: # 10 # 1000 # 256 """Run the PrimAITE benchmark.""" - start_datetime = datetime.now() - av_reward_per_episode_dicts = {} - for i in range(1, 11): + benchmark_start_time = datetime.now() + + session_metadata_dict = {} + + _prepare_session_directory() + + # run training + for i in range(1, number_of_sessions + 1): print(f"Starting Benchmark Session: {i}") - with _get_benchmark_primaite_session() as session: - session.learn() - av_reward_per_episode_dicts[i] = session.learn_metadata_dict() - benchmark_metadata = _build_benchmark_results_dict( - start_datetime=start_datetime, metadata_dict=av_reward_per_episode_dicts + with _get_benchmark_primaite_environment() as gym_env: + session = BenchmarkSession(gym_env=gym_env, num_episodes=num_episodes, batch_size=batch_size) + session.train() + session_metadata_dict[i] = session.session_metadata + + # generate report + build_benchmark_latex_report( + benchmark_start_time=benchmark_start_time, + session_metadata=session_metadata_dict, + config_path=data_manipulation_config_path(), + results_root_path=_RESULTS_ROOT, ) - v_str = f"v{primaite.__version__}" - - version_result_dir = _RESULTS_ROOT / v_str - if version_result_dir.exists(): - shutil.rmtree(version_result_dir) - version_result_dir.mkdir(exist_ok=True, parents=True) - - with open(version_result_dir / f"{v_str}_benchmark_metadata.json", "w") as file: - json.dump(benchmark_metadata, file, indent=4) - title = f"PrimAITE v{primaite.__version__.strip()} Learning Benchmark" - fig = _plot_benchmark_metadata(benchmark_metadata, title=title) - this_version_plot_path = version_result_dir / f"{title}.png" - fig.write_image(this_version_plot_path) - - fig = _plot_all_benchmarks_combined_session_av() - - all_version_plot_path = _RESULTS_ROOT / "PrimAITE Versions Learning Benchmark.png" - fig.write_image(all_version_plot_path) - - _build_benchmark_latex_report(benchmark_metadata, this_version_plot_path, all_version_plot_path) if __name__ == "__main__": diff --git a/benchmark/utils/benchmark.py b/benchmark/utils/benchmark.py new file mode 100644 index 00000000..fc457a03 --- /dev/null +++ b/benchmark/utils/benchmark.py @@ -0,0 +1,122 @@ +from datetime import datetime +from typing import Any, Dict, List, Optional, Tuple + +from gymnasium.core import ObsType + +from primaite.session.environment import PrimaiteGymEnv + + +class BenchmarkPrimaiteGymEnv(PrimaiteGymEnv): + """ + Class that extends the PrimaiteGymEnv. + + The reset method is extended so that the average rewards per episode are recorded. + """ + + total_time_steps: int = 0 + + def reset(self, seed: Optional[int] = None) -> Tuple[ObsType, Dict[str, Any]]: + """Overrides the PrimAITEGymEnv reset so that the total timesteps is saved.""" + self.total_time_steps += self.game.step_counter + + return super().reset(seed=seed) + + +##################################### +# IGNORE BELOW FOR NOW +##################################### + + +class BenchMarkOSInfo: + """Operating System Information about the machine that run the benchmark.""" + + operating_system: str + """The operating system the benchmark was run on.""" + + operating_system_version: str + """The operating system version the benchmark was run on.""" + + machine: str + """The type of machine running the benchmark.""" + + processor: str + """The processor used to run the benchmark.""" + + +class BenchMarkCPUInfo: + """CPU Information of the machine that ran the benchmark.""" + + physical_cores: int + """The number of CPU cores the machine that ran the benchmark had.""" + + total_cores: int + """The number of total cores the machine that run the benchmark had.""" + + max_frequency: int + """The CPU's maximum clock speed.""" + + +class BenchMarkMemoryInfo: + """The Memory Information of the machine that ran the benchmark.""" + + total: str + """The total amount of memory.""" + + swap_total: str + """Virtual memory.""" + + +class BenchMarkGPUInfo: + """The GPU Information of the machine that ran the benchmark.""" + + name: str + """GPU name.""" + + total_memory: str + """GPU memory.""" + + +class BenchMarkSystemInfo: + """Overall system information of the machine that ran the benchmark.""" + + system: BenchMarkOSInfo + cpu: BenchMarkCPUInfo + memory: BenchMarkMemoryInfo + gpu: List[BenchMarkMemoryInfo] + + +class BenchMarkResult: + """Class containing the relevant benchmark results.""" + + benchmark_start_time: datetime + """Start time of the benchmark run.""" + + benchmark_end_time: datetime + """End time of the benchmark run.""" + + primaite_version: str + """The version of PrimAITE being benchmarked.""" + + system_info: BenchMarkSystemInfo + """System information of the machine that ran the benchmark.""" + + total_sessions: int + """The number of sessions that the benchmark ran.""" + + total_episodes: int + """The number of episodes over all the sessions that the benchmark ran.""" + + total_timesteps: int + """The number of timesteps over all the sessions that the benchmark ran.""" + + average_seconds_per_session: float + """The average time per session.""" + + average_seconds_per_step: float + """The average time per step.""" + + average_seconds_per_100_steps_and_10_nodes: float + """The average time per 100 steps on a 10 node network.""" + + combined_average_reward_per_episode: Dict + """tbd.""" diff --git a/benchmark/utils/report.py b/benchmark/utils/report.py new file mode 100644 index 00000000..0b509d37 --- /dev/null +++ b/benchmark/utils/report.py @@ -0,0 +1,304 @@ +# © Crown-owned copyright 2023, Defence Science and Technology Laboratory UK +import json +import shutil +import sys +from datetime import datetime +from pathlib import Path +from typing import Dict, Optional + +import plotly.graph_objects as go +import polars as pl +import yaml +from plotly.graph_objs import Figure +from pylatex import Command, Document +from pylatex import Figure as LatexFigure +from pylatex import Section, Subsection, Tabular +from pylatex.utils import bold + +import primaite +from benchmark.utils.utils import _get_system_info + +PLOT_CONFIG = { + "size": {"auto_size": False, "width": 1500, "height": 900}, + "template": "plotly_white", + "range_slider": False, +} + + +def _build_benchmark_results_dict(start_datetime: datetime, metadata_dict: Dict, config: Dict) -> dict: + n = len(metadata_dict) + + averaged_data = { + "start_timestamp": start_datetime.isoformat(), + "end_datetime": datetime.now().isoformat(), + "primaite_version": primaite.__version__, + "system_info": _get_system_info(), + "total_sessions": n, + "total_episodes": sum(d["total_episodes"] for d in metadata_dict.values()), + "total_time_steps": sum(d["total_time_steps"] for d in metadata_dict.values()), + "av_s_per_session": sum(d["total_s"] for d in metadata_dict.values()) / n, + "av_s_per_step": sum(d["s_per_step"] for d in metadata_dict.values()) / n, + "av_s_per_100_steps_10_nodes": sum(d["s_per_100_steps_10_nodes"] for d in metadata_dict.values()) / n, + "combined_av_reward_per_episode": {}, + "session_av_reward_per_episode": {k: v["av_reward_per_episode"] for k, v in metadata_dict.items()}, + "config": config, + } + + episode_averages = [episode["av_reward_per_episode"] for episode in metadata_dict.values()] + + episode = 0 + for episode_average in episode_averages: + episode += 1 + averaged_data["combined_av_reward_per_episode"][str(episode)] = episode_average + + return averaged_data + + +def _get_df_from_episode_av_reward_dict(data: Dict) -> pl.DataFrame: + data: Dict = {"episode": data.keys(), "av_reward": data.values()} + + return ( + pl.from_dict(data) + .with_columns(rolling_mean=pl.col("av_reward").rolling_mean(window_size=25)) + .rename({"rolling_mean": "rolling_av_reward"}) + ) + + +def _plot_benchmark_metadata( + benchmark_metadata_dict: Dict, + title: Optional[str] = None, + subtitle: Optional[str] = None, +) -> Figure: + if title: + if subtitle: + title = f"{title}
{subtitle}" + else: + if subtitle: + title = subtitle + + layout = go.Layout( + autosize=PLOT_CONFIG["size"]["auto_size"], + width=PLOT_CONFIG["size"]["width"], + height=PLOT_CONFIG["size"]["height"], + ) + # Create the line graph with a colored line + fig = go.Figure(layout=layout) + fig.update_layout(template=PLOT_CONFIG["template"]) + + for session, av_reward_dict in benchmark_metadata_dict["session_av_reward_per_episode"].items(): + df = _get_df_from_episode_av_reward_dict(av_reward_dict) + fig.add_trace( + go.Scatter( + x=df["episode"], + y=df["av_reward"], + mode="lines", + name=f"Session {session}", + opacity=0.25, + line={"color": "#a6a6a6"}, + ) + ) + + df = _get_df_from_episode_av_reward_dict(benchmark_metadata_dict["combined_av_reward_per_episode"]) + fig.add_trace( + go.Scatter( + x=df["episode"], y=df["av_reward"], mode="lines", name="Combined Session Av", line={"color": "#FF0000"} + ) + ) + + fig.add_trace( + go.Scatter( + x=df["episode"], + y=df["rolling_av_reward"], + mode="lines", + name="Rolling Av (Combined Session Av)", + line={"color": "#4CBB17"}, + ) + ) + + # Set the layout of the graph + fig.update_layout( + xaxis={ + "title": "Episode", + "type": "linear", + }, + yaxis={"title": "Average Reward"}, + title=title, + ) + + return fig + + +def _plot_all_benchmarks_combined_session_av(results_directory: Path) -> Figure: + """ + Plot the Benchmark results for each released version of PrimAITE. + + Does this by iterating over the ``benchmark/results`` directory and + extracting the benchmark metadata json for each version that has been + benchmarked. The combined_av_reward_per_episode is extracted from each, + converted into a polars dataframe, and plotted as a scatter line in plotly. + """ + title = "PrimAITE Versions Learning Benchmark" + subtitle = "Rolling Av (Combined Session Av)" + if title: + if subtitle: + title = f"{title}
{subtitle}" + else: + if subtitle: + title = subtitle + layout = go.Layout( + autosize=PLOT_CONFIG["size"]["auto_size"], + width=PLOT_CONFIG["size"]["width"], + height=PLOT_CONFIG["size"]["height"], + ) + # Create the line graph with a colored line + fig = go.Figure(layout=layout) + fig.update_layout(template=PLOT_CONFIG["template"]) + + for dir in results_directory.iterdir(): + if dir.is_dir(): + metadata_file = dir / f"{dir.name}_benchmark_metadata.json" + with open(metadata_file, "r") as file: + metadata_dict = json.load(file) + df = _get_df_from_episode_av_reward_dict(metadata_dict["combined_av_reward_per_episode"]) + + fig.add_trace(go.Scatter(x=df["episode"], y=df["rolling_av_reward"], mode="lines", name=dir.name)) + + # Set the layout of the graph + fig.update_layout( + xaxis={ + "title": "Episode", + "type": "linear", + }, + yaxis={"title": "Average Reward"}, + title=title, + ) + fig["data"][0]["showlegend"] = True + + return fig + + +def build_benchmark_latex_report( + benchmark_start_time: datetime, session_metadata: Dict, config_path: Path, results_root_path: Path +) -> None: + """Generates a latex report of the benchmark run.""" + # generate report folder + v_str = f"v{primaite.__version__}" + + version_result_dir = results_root_path / v_str + if version_result_dir.exists(): + shutil.rmtree(version_result_dir) + version_result_dir.mkdir(exist_ok=True, parents=True) + + # load the config file as dict + with open(config_path, "r") as f: + cfg_data = yaml.safe_load(f) + + # generate the benchmark metadata dict + benchmark_metadata_dict = _build_benchmark_results_dict( + start_datetime=benchmark_start_time, metadata_dict=session_metadata, config=cfg_data + ) + + with open(version_result_dir / f"{v_str}_benchmark_metadata.json", "w") as file: + json.dump(benchmark_metadata_dict, file, indent=4) + title = f"PrimAITE v{primaite.__version__.strip()} Learning Benchmark" + fig = _plot_benchmark_metadata(benchmark_metadata_dict, title=title) + this_version_plot_path = version_result_dir / f"{title}.png" + fig.write_image(this_version_plot_path) + + fig = _plot_all_benchmarks_combined_session_av() + + all_version_plot_path = results_root_path / "PrimAITE Versions Learning Benchmark.png" + fig.write_image(all_version_plot_path) + + geometry_options = {"tmargin": "2.5cm", "rmargin": "2.5cm", "bmargin": "2.5cm", "lmargin": "2.5cm"} + data = benchmark_metadata_dict + primaite_version = data["primaite_version"] + + # Create a new document + doc = Document("report", geometry_options=geometry_options) + # Title + doc.preamble.append(Command("title", f"PrimAITE {primaite_version} Learning Benchmark")) + doc.preamble.append(Command("author", "PrimAITE Dev Team")) + doc.preamble.append(Command("date", datetime.now().date())) + doc.append(Command("maketitle")) + + sessions = data["total_sessions"] + episodes = data["training_config"]["num_train_episodes"] + steps = data["training_config"]["num_train_steps"] + + # Body + with doc.create(Section("Introduction")): + doc.append( + f"PrimAITE v{primaite_version} was benchmarked automatically upon release. Learning rate metrics " + f"were captured to be referenced during system-level testing and user acceptance testing (UAT)." + ) + doc.append( + f"\nThe benchmarking process consists of running {sessions} training session using the same " + f"training and lay down config files. Each session trains an agent for {episodes} episodes, " + f"with each episode consisting of {steps} steps." + ) + doc.append( + f"\nThe mean reward per episode from each session is captured. This is then used to calculate a " + f"combined average reward per episode from the {sessions} individual sessions for smoothing. " + f"Finally, a 25-widow rolling average of the combined average reward per session is calculated for " + f"further smoothing." + ) + + with doc.create(Section("System Information")): + with doc.create(Subsection("Python")): + with doc.create(Tabular("|l|l|")) as table: + table.add_hline() + table.add_row((bold("Version"), sys.version)) + table.add_hline() + for section, section_data in data["system_info"].items(): + if section_data: + with doc.create(Subsection(section)): + if isinstance(section_data, dict): + with doc.create(Tabular("|l|l|")) as table: + table.add_hline() + for key, value in section_data.items(): + table.add_row((bold(key), value)) + table.add_hline() + elif isinstance(section_data, list): + headers = section_data[0].keys() + tabs_str = "|".join(["l" for _ in range(len(headers))]) + tabs_str = f"|{tabs_str}|" + with doc.create(Tabular(tabs_str)) as table: + table.add_hline() + table.add_row([bold(h) for h in headers]) + table.add_hline() + for item in section_data: + table.add_row(item.values()) + table.add_hline() + + headers_map = { + "total_sessions": "Total Sessions", + "total_episodes": "Total Episodes", + "total_time_steps": "Total Steps", + "av_s_per_session": "Av Session Duration (s)", + "av_s_per_step": "Av Step Duration (s)", + "av_s_per_100_steps_10_nodes": "Av Duration per 100 Steps per 10 Nodes (s)", + } + with doc.create(Section("Stats")): + with doc.create(Subsection("Benchmark Results")): + with doc.create(Tabular("|l|l|")) as table: + table.add_hline() + for section, header in headers_map.items(): + if section.startswith("av_"): + table.add_row((bold(header), f"{data[section]:.4f}")) + else: + table.add_row((bold(header), data[section])) + table.add_hline() + + with doc.create(Section("Graphs")): + with doc.create(Subsection(f"PrimAITE {primaite_version} Learning Benchmark Plot")): + with doc.create(LatexFigure(position="h!")) as pic: + pic.add_image(str(this_version_plot_path)) + pic.add_caption(f"PrimAITE {primaite_version} Learning Benchmark Plot") + + with doc.create(Subsection("PrimAITE All Versions Learning Benchmark Plot")): + with doc.create(LatexFigure(position="h!")) as pic: + pic.add_image(str(all_version_plot_path)) + pic.add_caption("PrimAITE All Versions Learning Benchmark Plot") + + doc.generate_pdf(str(this_version_plot_path).replace(".png", ""), clean_tex=True) diff --git a/benchmark/utils/utils.py b/benchmark/utils/utils.py new file mode 100644 index 00000000..f15c4a12 --- /dev/null +++ b/benchmark/utils/utils.py @@ -0,0 +1,47 @@ +# © Crown-owned copyright 2023, Defence Science and Technology Laboratory UK +import platform +from typing import Dict + +import psutil +from GPUtil import GPUtil + + +def get_size(size_bytes: int) -> str: + """ + Scale bytes to its proper format. + + e.g: + 1253656 => '1.20MB' + 1253656678 => '1.17GB' + + : + """ + factor = 1024 + for unit in ["", "K", "M", "G", "T", "P"]: + if size_bytes < factor: + return f"{size_bytes:.2f}{unit}B" + size_bytes /= factor + + +def _get_system_info() -> Dict: + """Builds and returns a dict containing system info.""" + uname = platform.uname() + cpu_freq = psutil.cpu_freq() + virtual_mem = psutil.virtual_memory() + swap_mem = psutil.swap_memory() + gpus = GPUtil.getGPUs() + return { + "System": { + "OS": uname.system, + "OS Version": uname.version, + "Machine": uname.machine, + "Processor": uname.processor, + }, + "CPU": { + "Physical Cores": psutil.cpu_count(logical=False), + "Total Cores": psutil.cpu_count(logical=True), + "Max Frequency": f"{cpu_freq.max:.2f}Mhz", + }, + "Memory": {"Total": get_size(virtual_mem.total), "Swap Total": get_size(swap_mem.total)}, + "GPU": [{"Name": gpu.name, "Total Memory": f"{gpu.memoryTotal}MB"} for gpu in gpus], + } diff --git a/pyproject.toml b/pyproject.toml index 9f7eda52..9001cd30 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -33,7 +33,7 @@ dependencies = [ "numpy==1.23.5", "platformdirs==3.5.1", "plotly==5.15.0", - "polars==0.18.4", + "polars==0.20.30", "prettytable==3.8.0", "PyYAML==6.0", "stable-baselines3[extra]==2.1.0", diff --git a/src/primaite/session/environment.py b/src/primaite/session/environment.py index 4d0544e9..7af9a75d 100644 --- a/src/primaite/session/environment.py +++ b/src/primaite/session/environment.py @@ -37,6 +37,8 @@ class PrimaiteGymEnv(gymnasium.Env): """Name of the RL agent. Since there should only be one RL agent we can just pull the first and only key.""" self.episode_counter: int = 0 """Current episode number.""" + self.average_reward_per_episode: Dict[int, float] = {} + """Average rewards of agents per episode.""" @property def agent(self) -> ProxyAgent: @@ -89,6 +91,8 @@ class PrimaiteGymEnv(gymnasium.Env): f"Resetting environment, episode {self.episode_counter}, " f"avg. reward: {self.agent.reward_function.total_reward}" ) + self.average_reward_per_episode[self.episode_counter] = self.agent.reward_function.total_reward + if self.io.settings.save_agent_actions: all_agent_actions = {name: agent.action_history for name, agent in self.game.agents.items()} self.io.write_agent_actions(agent_actions=all_agent_actions, episode=self.episode_counter)