#2628: commit changes to combined av per episode

This commit is contained in:
Czar Echavez
2024-05-31 15:20:10 +01:00
parent add8cc0ab1
commit 5f02846879
2 changed files with 26 additions and 19 deletions

View File

@@ -62,7 +62,7 @@ class BenchmarkSession:
batch_size=self.batch_size, batch_size=self.batch_size,
n_steps=self.batch_size * self.num_episodes, n_steps=self.batch_size * self.num_episodes,
) )
model.learn(total_timesteps=self.batch_size * self.num_episodes) model.learn(total_timesteps=self.num_episodes * self.gym_env.game.options.max_episode_length)
# end timer for session # end timer for session
self.end_time = datetime.now() self.end_time = datetime.now()
@@ -108,13 +108,15 @@ class BenchmarkSession:
} }
def _get_benchmark_primaite_environment() -> BenchmarkPrimaiteGymEnv: def _get_benchmark_primaite_environment(num_timesteps: int) -> BenchmarkPrimaiteGymEnv:
""" """
Create an instance of the BenchmarkPrimaiteGymEnv. Create an instance of the BenchmarkPrimaiteGymEnv.
This environment will be used to train the agents on. This environment will be used to train the agents on.
""" """
return BenchmarkPrimaiteGymEnv(env_config=data_manipulation_config_path()) env = BenchmarkPrimaiteGymEnv(env_config=data_manipulation_config_path())
env.game.options.max_episode_length = num_timesteps
return env
def _prepare_session_directory(): def _prepare_session_directory():
@@ -129,7 +131,9 @@ def _prepare_session_directory():
primaite.PRIMAITE_PATHS.user_sessions_path.mkdir(exist_ok=True, parents=True) primaite.PRIMAITE_PATHS.user_sessions_path.mkdir(exist_ok=True, parents=True)
def run(number_of_sessions: int = 1, num_episodes: int = 3, batch_size: int = 128) -> None: # 10 # 1000 # 256 def run(
number_of_sessions: int = 3, num_episodes: int = 3, num_timesteps: int = 128, batch_size: int = 128
) -> None: # 10 # 1000 # 256
"""Run the PrimAITE benchmark.""" """Run the PrimAITE benchmark."""
benchmark_start_time = datetime.now() benchmark_start_time = datetime.now()
@@ -141,7 +145,7 @@ def run(number_of_sessions: int = 1, num_episodes: int = 3, batch_size: int = 12
for i in range(1, number_of_sessions + 1): for i in range(1, number_of_sessions + 1):
print(f"Starting Benchmark Session: {i}") print(f"Starting Benchmark Session: {i}")
with _get_benchmark_primaite_environment() as gym_env: with _get_benchmark_primaite_environment(num_timesteps=num_timesteps) as gym_env:
session = BenchmarkSession(gym_env=gym_env, num_episodes=num_episodes, batch_size=batch_size) session = BenchmarkSession(gym_env=gym_env, num_episodes=num_episodes, batch_size=batch_size)
session.train() session.train()
session_metadata_dict[i] = session.session_metadata session_metadata_dict[i] = session.session_metadata

View File

@@ -26,30 +26,33 @@ PLOT_CONFIG = {
def _build_benchmark_results_dict(start_datetime: datetime, metadata_dict: Dict, config: Dict) -> dict: def _build_benchmark_results_dict(start_datetime: datetime, metadata_dict: Dict, config: Dict) -> dict:
n = len(metadata_dict) num_sessions = len(metadata_dict) # number of sessions
averaged_data = { averaged_data = {
"start_timestamp": start_datetime.isoformat(), "start_timestamp": start_datetime.isoformat(),
"end_datetime": datetime.now().isoformat(), "end_datetime": datetime.now().isoformat(),
"primaite_version": primaite.__version__, "primaite_version": primaite.__version__,
"system_info": _get_system_info(), "system_info": _get_system_info(),
"total_sessions": n, "total_sessions": num_sessions,
"total_episodes": sum(d["total_episodes"] for d in metadata_dict.values()), "total_episodes": sum(d["total_episodes"] for d in metadata_dict.values()),
"total_time_steps": sum(d["total_time_steps"] for d in metadata_dict.values()), "total_time_steps": sum(d["total_time_steps"] for d in metadata_dict.values()),
"av_s_per_session": sum(d["total_s"] for d in metadata_dict.values()) / n, "av_s_per_session": sum(d["total_s"] for d in metadata_dict.values()) / num_sessions,
"av_s_per_step": sum(d["s_per_step"] for d in metadata_dict.values()) / n, "av_s_per_step": sum(d["s_per_step"] for d in metadata_dict.values()) / num_sessions,
"av_s_per_100_steps_10_nodes": sum(d["s_per_100_steps_10_nodes"] for d in metadata_dict.values()) / n, "av_s_per_100_steps_10_nodes": sum(d["s_per_100_steps_10_nodes"] for d in metadata_dict.values())
/ num_sessions,
"combined_av_reward_per_episode": {}, "combined_av_reward_per_episode": {},
"session_av_reward_per_episode": {k: v["av_reward_per_episode"] for k, v in metadata_dict.items()}, "session_av_reward_per_episode": {k: v["av_reward_per_episode"] for k, v in metadata_dict.items()},
"config": config, "config": config,
} }
episode_averages = [episode["av_reward_per_episode"] for episode in metadata_dict.values()] # find the average of each episode across all sessions
episodes = metadata_dict[1]["av_reward_per_episode"].keys()
episode = 0 for episode in episodes:
for episode_average in episode_averages: combined_av_reward = (
episode += 1 sum(metadata_dict[k]["av_reward_per_episode"][episode] for k in metadata_dict.keys()) / num_sessions
averaged_data["combined_av_reward_per_episode"][str(episode)] = episode_average )
averaged_data["combined_av_reward_per_episode"][episode] = combined_av_reward
return averaged_data return averaged_data
@@ -205,7 +208,7 @@ def build_benchmark_latex_report(
this_version_plot_path = version_result_dir / f"{title}.png" this_version_plot_path = version_result_dir / f"{title}.png"
fig.write_image(this_version_plot_path) fig.write_image(this_version_plot_path)
fig = _plot_all_benchmarks_combined_session_av() fig = _plot_all_benchmarks_combined_session_av(results_directory=results_root_path)
all_version_plot_path = results_root_path / "PrimAITE Versions Learning Benchmark.png" all_version_plot_path = results_root_path / "PrimAITE Versions Learning Benchmark.png"
fig.write_image(all_version_plot_path) fig.write_image(all_version_plot_path)
@@ -223,8 +226,8 @@ def build_benchmark_latex_report(
doc.append(Command("maketitle")) doc.append(Command("maketitle"))
sessions = data["total_sessions"] sessions = data["total_sessions"]
episodes = data["training_config"]["num_train_episodes"] episodes = session_metadata[1]["total_episodes"] - 1
steps = data["training_config"]["num_train_steps"] steps = data["config"]["game"]["max_episode_length"]
# Body # Body
with doc.create(Section("Introduction")): with doc.create(Section("Introduction")):
@@ -234,7 +237,7 @@ def build_benchmark_latex_report(
) )
doc.append( doc.append(
f"\nThe benchmarking process consists of running {sessions} training session using the same " f"\nThe benchmarking process consists of running {sessions} training session using the same "
f"training and lay down config files. Each session trains an agent for {episodes} episodes, " f"config file. Each session trains an agent for {episodes} episodes, "
f"with each episode consisting of {steps} steps." f"with each episode consisting of {steps} steps."
) )
doc.append( doc.append(