From 5f02846879ed9a6c8b1160ed6b21e57b664daba5 Mon Sep 17 00:00:00 2001 From: Czar Echavez Date: Fri, 31 May 2024 15:20:10 +0100 Subject: [PATCH] #2628: commit changes to combined av per episode --- benchmark/primaite_benchmark.py | 14 +++++++++----- benchmark/utils/report.py | 31 +++++++++++++++++-------------- 2 files changed, 26 insertions(+), 19 deletions(-) diff --git a/benchmark/primaite_benchmark.py b/benchmark/primaite_benchmark.py index c68d5a68..2f8b2b51 100644 --- a/benchmark/primaite_benchmark.py +++ b/benchmark/primaite_benchmark.py @@ -62,7 +62,7 @@ class BenchmarkSession: batch_size=self.batch_size, n_steps=self.batch_size * self.num_episodes, ) - model.learn(total_timesteps=self.batch_size * self.num_episodes) + model.learn(total_timesteps=self.num_episodes * self.gym_env.game.options.max_episode_length) # end timer for session self.end_time = datetime.now() @@ -108,13 +108,15 @@ class BenchmarkSession: } -def _get_benchmark_primaite_environment() -> BenchmarkPrimaiteGymEnv: +def _get_benchmark_primaite_environment(num_timesteps: int) -> BenchmarkPrimaiteGymEnv: """ Create an instance of the BenchmarkPrimaiteGymEnv. This environment will be used to train the agents on. """ - return BenchmarkPrimaiteGymEnv(env_config=data_manipulation_config_path()) + env = BenchmarkPrimaiteGymEnv(env_config=data_manipulation_config_path()) + env.game.options.max_episode_length = num_timesteps + return env def _prepare_session_directory(): @@ -129,7 +131,9 @@ def _prepare_session_directory(): primaite.PRIMAITE_PATHS.user_sessions_path.mkdir(exist_ok=True, parents=True) -def run(number_of_sessions: int = 1, num_episodes: int = 3, batch_size: int = 128) -> None: # 10 # 1000 # 256 +def run( + number_of_sessions: int = 3, num_episodes: int = 3, num_timesteps: int = 128, batch_size: int = 128 +) -> None: # 10 # 1000 # 256 """Run the PrimAITE benchmark.""" benchmark_start_time = datetime.now() @@ -141,7 +145,7 @@ def run(number_of_sessions: int = 1, num_episodes: int = 3, batch_size: int = 12 for i in range(1, number_of_sessions + 1): print(f"Starting Benchmark Session: {i}") - with _get_benchmark_primaite_environment() as gym_env: + with _get_benchmark_primaite_environment(num_timesteps=num_timesteps) as gym_env: session = BenchmarkSession(gym_env=gym_env, num_episodes=num_episodes, batch_size=batch_size) session.train() session_metadata_dict[i] = session.session_metadata diff --git a/benchmark/utils/report.py b/benchmark/utils/report.py index 0b509d37..b0b0e52a 100644 --- a/benchmark/utils/report.py +++ b/benchmark/utils/report.py @@ -26,30 +26,33 @@ PLOT_CONFIG = { def _build_benchmark_results_dict(start_datetime: datetime, metadata_dict: Dict, config: Dict) -> dict: - n = len(metadata_dict) + num_sessions = len(metadata_dict) # number of sessions averaged_data = { "start_timestamp": start_datetime.isoformat(), "end_datetime": datetime.now().isoformat(), "primaite_version": primaite.__version__, "system_info": _get_system_info(), - "total_sessions": n, + "total_sessions": num_sessions, "total_episodes": sum(d["total_episodes"] for d in metadata_dict.values()), "total_time_steps": sum(d["total_time_steps"] for d in metadata_dict.values()), - "av_s_per_session": sum(d["total_s"] for d in metadata_dict.values()) / n, - "av_s_per_step": sum(d["s_per_step"] for d in metadata_dict.values()) / n, - "av_s_per_100_steps_10_nodes": sum(d["s_per_100_steps_10_nodes"] for d in metadata_dict.values()) / n, + "av_s_per_session": sum(d["total_s"] for d in metadata_dict.values()) / num_sessions, + "av_s_per_step": sum(d["s_per_step"] for d in metadata_dict.values()) / num_sessions, + "av_s_per_100_steps_10_nodes": sum(d["s_per_100_steps_10_nodes"] for d in metadata_dict.values()) + / num_sessions, "combined_av_reward_per_episode": {}, "session_av_reward_per_episode": {k: v["av_reward_per_episode"] for k, v in metadata_dict.items()}, "config": config, } - episode_averages = [episode["av_reward_per_episode"] for episode in metadata_dict.values()] + # find the average of each episode across all sessions + episodes = metadata_dict[1]["av_reward_per_episode"].keys() - episode = 0 - for episode_average in episode_averages: - episode += 1 - averaged_data["combined_av_reward_per_episode"][str(episode)] = episode_average + for episode in episodes: + combined_av_reward = ( + sum(metadata_dict[k]["av_reward_per_episode"][episode] for k in metadata_dict.keys()) / num_sessions + ) + averaged_data["combined_av_reward_per_episode"][episode] = combined_av_reward return averaged_data @@ -205,7 +208,7 @@ def build_benchmark_latex_report( this_version_plot_path = version_result_dir / f"{title}.png" fig.write_image(this_version_plot_path) - fig = _plot_all_benchmarks_combined_session_av() + fig = _plot_all_benchmarks_combined_session_av(results_directory=results_root_path) all_version_plot_path = results_root_path / "PrimAITE Versions Learning Benchmark.png" fig.write_image(all_version_plot_path) @@ -223,8 +226,8 @@ def build_benchmark_latex_report( doc.append(Command("maketitle")) sessions = data["total_sessions"] - episodes = data["training_config"]["num_train_episodes"] - steps = data["training_config"]["num_train_steps"] + episodes = session_metadata[1]["total_episodes"] - 1 + steps = data["config"]["game"]["max_episode_length"] # Body with doc.create(Section("Introduction")): @@ -234,7 +237,7 @@ def build_benchmark_latex_report( ) doc.append( f"\nThe benchmarking process consists of running {sessions} training session using the same " - f"training and lay down config files. Each session trains an agent for {episodes} episodes, " + f"config file. Each session trains an agent for {episodes} episodes, " f"with each episode consisting of {steps} steps." ) doc.append(