PrimAITE/benchmark/primaite_benchmark.py

# © Crown-owned copyright 2023, Defence Science and Technology Laboratory UK
import shutil
from datetime import datetime
from pathlib import Path
from typing import Any, Dict, Final, Tuple

from report import build_benchmark_latex_report
from stable_baselines3 import PPO

import primaite
from benchmark import BenchmarkPrimaiteGymEnv
from primaite.config.load import data_manipulation_config_path

_LOGGER = primaite.getLogger(__name__)

_BENCHMARK_ROOT = Path(__file__).parent
_RESULTS_ROOT: Final[Path] = _BENCHMARK_ROOT / "results"
_RESULTS_ROOT.mkdir(exist_ok=True, parents=True)

_OUTPUT_ROOT: Final[Path] = _BENCHMARK_ROOT / "output"
# Clear and recreate the output directory
if _OUTPUT_ROOT.exists():
    shutil.rmtree(_OUTPUT_ROOT)
_OUTPUT_ROOT.mkdir()


class BenchmarkSession:
    """Benchmark Session class."""

    gym_env: BenchmarkPrimaiteGymEnv
    """Gym environment used by the session to train."""

    num_episodes: int
    """Number of episodes to run the training session."""

    num_steps: int
    """Number of steps to run the training session."""

    batch_size: int
    """Number of steps for each episode."""

    learning_rate: float
    """Learning rate for the model."""

    start_time: datetime
    """Start time for the session."""

    end_time: datetime
    """End time for the session."""

    session_metadata: Dict
    """Dict containing the metadata for the session - used to generate benchmark report."""

    def __init__(
        self, gym_env: BenchmarkPrimaiteGymEnv, num_episodes: int, num_steps: int, batch_size: int, learning_rate: float
    ):
        """Initialise the BenchmarkSession."""
        self.gym_env = gym_env
        self.num_episodes = num_episodes
        self.num_steps = num_steps
        self.batch_size = batch_size
        self.learning_rate = learning_rate

    def train(self):
        """Run the training session."""
        # start timer for session
        self.start_time = datetime.now()
        # TODO check these parameters are correct
        # EPISODE_LEN = 10
        TOTAL_TIMESTEPS = 131072
        LEARNING_RATE = 3e-4
        model = PPO("MlpPolicy", self.gym_env, learning_rate=LEARNING_RATE, verbose=0, tensorboard_log="./PPO_UC2/")
        model.learn(total_timesteps=TOTAL_TIMESTEPS)

        # end timer for session
        self.end_time = datetime.now()

        self.session_metadata = self.generate_learn_metadata_dict()

    def _learn_benchmark_durations(self) -> Tuple[float, float, float]:
        """
        Calculate and return the learning benchmark durations.

        Calculates the:
        - Total learning time in seconds
        - Total learning time per time step in seconds
        - Total learning time per 100 time steps per 10 nodes in seconds

        :return: The learning benchmark durations as a Tuple of three floats:
            Tuple[total_s, s_per_step, s_per_100_steps_10_nodes].
        """
        delta = self.end_time - self.start_time
        total_s = delta.total_seconds()

        total_steps = self.batch_size * self.num_episodes
        s_per_step = total_s / total_steps

        num_nodes = len(self.gym_env.game.simulation.network.nodes)
        num_intervals = total_steps / 100
        av_interval_time = total_s / num_intervals
        s_per_100_steps_10_nodes = av_interval_time / (num_nodes / 10)

        return total_s, s_per_step, s_per_100_steps_10_nodes

    def generate_learn_metadata_dict(self) -> Dict[str, Any]:
        """Metadata specific to the learning session."""
        total_s, s_per_step, s_per_100_steps_10_nodes = self._learn_benchmark_durations()
        self.gym_env.average_reward_per_episode.pop(0)  # remove episode 0
        return {
            "total_episodes": self.gym_env.episode_counter,
            "total_time_steps": self.gym_env.total_time_steps,
            "total_s": total_s,
            "s_per_step": s_per_step,
            "s_per_100_steps_10_nodes": s_per_100_steps_10_nodes,
            "av_reward_per_episode": self.gym_env.average_reward_per_episode,
        }


def _get_benchmark_primaite_environment() -> BenchmarkPrimaiteGymEnv:
    """
    Create an instance of the BenchmarkPrimaiteGymEnv.

    This environment will be used to train the agents on.
    """
    env = BenchmarkPrimaiteGymEnv(env_config=data_manipulation_config_path())
    return env


def _prepare_session_directory():
    """Prepare the session directory so that it is easier to clean up after the benchmarking is done."""
    # override session path
    session_path = _BENCHMARK_ROOT / "sessions"

    if session_path.is_dir():
        shutil.rmtree(session_path)

    primaite.PRIMAITE_PATHS.user_sessions_path = session_path
    primaite.PRIMAITE_PATHS.user_sessions_path.mkdir(exist_ok=True, parents=True)


def run(
    number_of_sessions: int = 5,
    num_episodes: int = 512,
    num_timesteps: int = 128,
    batch_size: int = 128,
    learning_rate: float = 3e-4,
) -> None:  # 10  # 1000  # 256
    """Run the PrimAITE benchmark."""
    benchmark_start_time = datetime.now()

    session_metadata_dict = {}

    _prepare_session_directory()

    # run training
    for i in range(1, number_of_sessions + 1):
        print(f"Starting Benchmark Session: {i}")

        with _get_benchmark_primaite_environment() as gym_env:
            session = BenchmarkSession(
                gym_env=gym_env,
                num_episodes=num_episodes,
                num_steps=num_timesteps,
                batch_size=batch_size,
                learning_rate=learning_rate,
            )
            session.train()
            session_metadata_dict[i] = session.session_metadata

    # generate report
    build_benchmark_latex_report(
        benchmark_start_time=benchmark_start_time,
        session_metadata=session_metadata_dict,
        config_path=data_manipulation_config_path(),
        results_root_path=_RESULTS_ROOT,
    )


if __name__ == "__main__":
    run()
#1648 - Added header to benchmark files 2023-07-21 15:06:05 +01:00			`# © Crown-owned copyright 2023, Defence Science and Technology Laboratory UK`
#1632 - Added bench marking script 2023-07-18 10:11:01 +01:00			`import shutil`
			`from datetime import datetime`
			`from pathlib import Path`
#2628: temp commit 2024-05-31 13:47:02 +01:00			`from typing import Any, Dict, Final, Tuple`

#2628: commit 2024-06-05 11:03:39 +01:00			`from report import build_benchmark_latex_report`
#2628: temp commit 2024-05-31 13:47:02 +01:00			`from stable_baselines3 import PPO`
#1632 - Added full benchmarking script that included plots and a LaTeX report. Ran the v2.0.0rc1 benchmark. Tidied a few other things up. 2023-07-20 08:48:18 +01:00
#1632 - Added bench marking script 2023-07-18 10:11:01 +01:00			`import primaite`
#2628: commit 2024-06-05 11:03:39 +01:00			`from benchmark import BenchmarkPrimaiteGymEnv`
#2628: temp commit 2024-05-31 13:47:02 +01:00			`from primaite.config.load import data_manipulation_config_path`
#1632 - Added bench marking script 2023-07-18 10:11:01 +01:00
			`_LOGGER = primaite.getLogger(__name__)`

#2628: committing to be reviewed 2024-06-01 13:23:27 +01:00			`_BENCHMARK_ROOT = Path(__file__).parent`
#1632 - Added full benchmarking script that included plots and a LaTeX report. Ran the v2.0.0rc1 benchmark. Tidied a few other things up. 2023-07-20 08:48:18 +01:00			`_RESULTS_ROOT: Final[Path] = _BENCHMARK_ROOT / "results"`
#1632 - Added bench marking script 2023-07-18 10:11:01 +01:00			`_RESULTS_ROOT.mkdir(exist_ok=True, parents=True)`

#1632 - Added full benchmarking script that included plots and a LaTeX report. Ran the v2.0.0rc1 benchmark. Tidied a few other things up. 2023-07-20 08:48:18 +01:00			`_OUTPUT_ROOT: Final[Path] = _BENCHMARK_ROOT / "output"`
#1632 - Added bench marking script 2023-07-18 10:11:01 +01:00			`# Clear and recreate the output directory`
#1632 - Fixed output directory clear bug. Added gputil to dev deps. 2023-07-20 10:28:19 +01:00			`if _OUTPUT_ROOT.exists():`
			`shutil.rmtree(_OUTPUT_ROOT)`
#1632 - Added bench marking script 2023-07-18 10:11:01 +01:00			`_OUTPUT_ROOT.mkdir()`

#1632 - Added full benchmarking script that included plots and a LaTeX report. Ran the v2.0.0rc1 benchmark. Tidied a few other things up. 2023-07-20 08:48:18 +01:00
#2628: temp commit 2024-05-31 13:47:02 +01:00			`class BenchmarkSession:`
			`"""Benchmark Session class."""`
#1632 - Added full benchmarking script that included plots and a LaTeX report. Ran the v2.0.0rc1 benchmark. Tidied a few other things up. 2023-07-20 08:48:18 +01:00
#2628: temp commit 2024-05-31 13:47:02 +01:00			`gym_env: BenchmarkPrimaiteGymEnv`
			`"""Gym environment used by the session to train."""`
#1632 - Added full benchmarking script that included plots and a LaTeX report. Ran the v2.0.0rc1 benchmark. Tidied a few other things up. 2023-07-20 08:48:18 +01:00
#2628: temp commit 2024-05-31 13:47:02 +01:00			`num_episodes: int`
			`"""Number of episodes to run the training session."""`
#1632 - Added full benchmarking script that included plots and a LaTeX report. Ran the v2.0.0rc1 benchmark. Tidied a few other things up. 2023-07-20 08:48:18 +01:00
#2628: committing to be reviewed 2024-06-01 13:23:27 +01:00			`num_steps: int`
			`"""Number of steps to run the training session."""`

#2628: temp commit 2024-05-31 13:47:02 +01:00			`batch_size: int`
			`"""Number of steps for each episode."""`

#2628: committing to be reviewed 2024-06-01 13:23:27 +01:00			`learning_rate: float`
			`"""Learning rate for the model."""`

#2628: temp commit 2024-05-31 13:47:02 +01:00			`start_time: datetime`
			`"""Start time for the session."""`

			`end_time: datetime`
			`"""End time for the session."""`

			`session_metadata: Dict`
			`"""Dict containing the metadata for the session - used to generate benchmark report."""`

#2628: committing to be reviewed 2024-06-01 13:23:27 +01:00			`def __init__(`
			`self, gym_env: BenchmarkPrimaiteGymEnv, num_episodes: int, num_steps: int, batch_size: int, learning_rate: float`
			`):`
#2628: temp commit 2024-05-31 13:47:02 +01:00			`"""Initialise the BenchmarkSession."""`
			`self.gym_env = gym_env`
			`self.num_episodes = num_episodes`
#2628: committing to be reviewed 2024-06-01 13:23:27 +01:00			`self.num_steps = num_steps`
#2628: temp commit 2024-05-31 13:47:02 +01:00			`self.batch_size = batch_size`
#2628: committing to be reviewed 2024-06-01 13:23:27 +01:00			`self.learning_rate = learning_rate`
#2628: temp commit 2024-05-31 13:47:02 +01:00
			`def train(self):`
			`"""Run the training session."""`
			`# start timer for session`
			`self.start_time = datetime.now()`
#2628: commit 2024-06-05 11:03:39 +01:00			`# TODO check these parameters are correct`
			`# EPISODE_LEN = 10`
			`TOTAL_TIMESTEPS = 131072`
			`LEARNING_RATE = 3e-4`
			`model = PPO("MlpPolicy", self.gym_env, learning_rate=LEARNING_RATE, verbose=0, tensorboard_log="./PPO_UC2/")`
			`model.learn(total_timesteps=TOTAL_TIMESTEPS)`
#2628: temp commit 2024-05-31 13:47:02 +01:00
			`# end timer for session`
			`self.end_time = datetime.now()`
#1632 - Added full benchmarking script that included plots and a LaTeX report. Ran the v2.0.0rc1 benchmark. Tidied a few other things up. 2023-07-20 08:48:18 +01:00
#2628: temp commit 2024-05-31 13:47:02 +01:00			`self.session_metadata = self.generate_learn_metadata_dict()`
#1632 - Added full benchmarking script that included plots and a LaTeX report. Ran the v2.0.0rc1 benchmark. Tidied a few other things up. 2023-07-20 08:48:18 +01:00
#1632 - Added bench marking script 2023-07-18 10:11:01 +01:00			`def _learn_benchmark_durations(self) -> Tuple[float, float, float]:`
			`"""`
			`Calculate and return the learning benchmark durations.`

			`Calculates the:`
			`- Total learning time in seconds`
			`- Total learning time per time step in seconds`
			`- Total learning time per 100 time steps per 10 nodes in seconds`

			`:return: The learning benchmark durations as a Tuple of three floats:`
			`Tuple[total_s, s_per_step, s_per_100_steps_10_nodes].`
			`"""`
#2628: temp commit 2024-05-31 13:47:02 +01:00			`delta = self.end_time - self.start_time`
#1632 - Added bench marking script 2023-07-18 10:11:01 +01:00			`total_s = delta.total_seconds()`

#2628: temp commit 2024-05-31 13:47:02 +01:00			`total_steps = self.batch_size * self.num_episodes`
#1632 - Added bench marking script 2023-07-18 10:11:01 +01:00			`s_per_step = total_s / total_steps`

#2628: temp commit 2024-05-31 13:47:02 +01:00			`num_nodes = len(self.gym_env.game.simulation.network.nodes)`
#1632 - Added bench marking script 2023-07-18 10:11:01 +01:00			`num_intervals = total_steps / 100`
			`av_interval_time = total_s / num_intervals`
			`s_per_100_steps_10_nodes = av_interval_time / (num_nodes / 10)`

			`return total_s, s_per_step, s_per_100_steps_10_nodes`

#2628: temp commit 2024-05-31 13:47:02 +01:00			`def generate_learn_metadata_dict(self) -> Dict[str, Any]:`
#1632 - Added bench marking script 2023-07-18 10:11:01 +01:00			`"""Metadata specific to the learning session."""`
			`total_s, s_per_step, s_per_100_steps_10_nodes = self._learn_benchmark_durations()`
#2628: temp commit 2024-05-31 13:47:02 +01:00			`self.gym_env.average_reward_per_episode.pop(0) # remove episode 0`
#1632 - Added bench marking script 2023-07-18 10:11:01 +01:00			`return {`
#2628: temp commit 2024-05-31 13:47:02 +01:00			`"total_episodes": self.gym_env.episode_counter,`
			`"total_time_steps": self.gym_env.total_time_steps,`
#1632 - Added bench marking script 2023-07-18 10:11:01 +01:00			`"total_s": total_s,`
			`"s_per_step": s_per_step,`
			`"s_per_100_steps_10_nodes": s_per_100_steps_10_nodes,`
#2628: temp commit 2024-05-31 13:47:02 +01:00			`"av_reward_per_episode": self.gym_env.average_reward_per_episode,`
#1632 - Added bench marking script 2023-07-18 10:11:01 +01:00			`}`


#2628: committing to be reviewed 2024-06-01 13:23:27 +01:00			`def _get_benchmark_primaite_environment() -> BenchmarkPrimaiteGymEnv:`
#2628: temp commit 2024-05-31 13:47:02 +01:00			`"""`
			`Create an instance of the BenchmarkPrimaiteGymEnv.`
#1632 - Added full benchmarking script that included plots and a LaTeX report. Ran the v2.0.0rc1 benchmark. Tidied a few other things up. 2023-07-20 08:48:18 +01:00
#2628: temp commit 2024-05-31 13:47:02 +01:00			`This environment will be used to train the agents on.`
			`"""`
#2628: commit changes to combined av per episode 2024-05-31 15:20:10 +01:00			`env = BenchmarkPrimaiteGymEnv(env_config=data_manipulation_config_path())`
			`return env`
#1632 - Added full benchmarking script that included plots and a LaTeX report. Ran the v2.0.0rc1 benchmark. Tidied a few other things up. 2023-07-20 08:48:18 +01:00

#2628: temp commit 2024-05-31 13:47:02 +01:00			`def _prepare_session_directory():`
			`"""Prepare the session directory so that it is easier to clean up after the benchmarking is done."""`
			`# override session path`
			`session_path = _BENCHMARK_ROOT / "sessions"`
#1632 - Added full benchmarking script that included plots and a LaTeX report. Ran the v2.0.0rc1 benchmark. Tidied a few other things up. 2023-07-20 08:48:18 +01:00
#2628: temp commit 2024-05-31 13:47:02 +01:00			`if session_path.is_dir():`
			`shutil.rmtree(session_path)`
#1632 - Added full benchmarking script that included plots and a LaTeX report. Ran the v2.0.0rc1 benchmark. Tidied a few other things up. 2023-07-20 08:48:18 +01:00
#2628: temp commit 2024-05-31 13:47:02 +01:00			`primaite.PRIMAITE_PATHS.user_sessions_path = session_path`
			`primaite.PRIMAITE_PATHS.user_sessions_path.mkdir(exist_ok=True, parents=True)`
#1632 - Added full benchmarking script that included plots and a LaTeX report. Ran the v2.0.0rc1 benchmark. Tidied a few other things up. 2023-07-20 08:48:18 +01:00

#2628: commit changes to combined av per episode 2024-05-31 15:20:10 +01:00			`def run(`
#2628: commit 2024-06-05 11:03:39 +01:00			`number_of_sessions: int = 5,`
			`num_episodes: int = 512,`
#2628: committing to be reviewed 2024-06-01 13:23:27 +01:00			`num_timesteps: int = 128,`
#2628: commit 2024-06-05 11:03:39 +01:00			`batch_size: int = 128,`
#2628: committing to be reviewed 2024-06-01 13:23:27 +01:00			`learning_rate: float = 3e-4,`
#2628: commit changes to combined av per episode 2024-05-31 15:20:10 +01:00			`) -> None: # 10 # 1000 # 256`
#2628: temp commit 2024-05-31 13:47:02 +01:00			`"""Run the PrimAITE benchmark."""`
			`benchmark_start_time = datetime.now()`
#1632 - Added full benchmarking script that included plots and a LaTeX report. Ran the v2.0.0rc1 benchmark. Tidied a few other things up. 2023-07-20 08:48:18 +01:00
#2628: temp commit 2024-05-31 13:47:02 +01:00			`session_metadata_dict = {}`
#1632 - Added full benchmarking script that included plots and a LaTeX report. Ran the v2.0.0rc1 benchmark. Tidied a few other things up. 2023-07-20 08:48:18 +01:00
#2628: temp commit 2024-05-31 13:47:02 +01:00			`_prepare_session_directory()`
#1632 - Added full benchmarking script that included plots and a LaTeX report. Ran the v2.0.0rc1 benchmark. Tidied a few other things up. 2023-07-20 08:48:18 +01:00
#2628: temp commit 2024-05-31 13:47:02 +01:00			`# run training`
			`for i in range(1, number_of_sessions + 1):`
#1632 - Added full benchmarking script that included plots and a LaTeX report. Ran the v2.0.0rc1 benchmark. Tidied a few other things up. 2023-07-20 08:48:18 +01:00			`print(f"Starting Benchmark Session: {i}")`
#1632 - Added bench marking script 2023-07-18 10:11:01 +01:00
#2628: committing to be reviewed 2024-06-01 13:23:27 +01:00			`with _get_benchmark_primaite_environment() as gym_env:`
			`session = BenchmarkSession(`
			`gym_env=gym_env,`
			`num_episodes=num_episodes,`
			`num_steps=num_timesteps,`
			`batch_size=batch_size,`
			`learning_rate=learning_rate,`
			`)`
#2628: temp commit 2024-05-31 13:47:02 +01:00			`session.train()`
			`session_metadata_dict[i] = session.session_metadata`

			`# generate report`
			`build_benchmark_latex_report(`
			`benchmark_start_time=benchmark_start_time,`
			`session_metadata=session_metadata_dict,`
			`config_path=data_manipulation_config_path(),`
			`results_root_path=_RESULTS_ROOT,`
#1632 - Added full benchmarking script that included plots and a LaTeX report. Ran the v2.0.0rc1 benchmark. Tidied a few other things up. 2023-07-20 08:48:18 +01:00			`)`
#1632 - Added bench marking script 2023-07-18 10:11:01 +01:00

			`if __name__ == "__main__":`
			`run()`