Merged PR 437: Automate Benchmarking Process with Scheduled and Release Triggers

## Summary

This pull request introduces an Azure DevOps pipeline that automates the benchmarking process for our project. The pipeline is designed to run under specific conditions and ensures that the benchmarking results are committed and published as artifacts. Additionally, the pipeline generates detailed benchmark reports in markdown format.

**Key Features:**

1.  **Triggering Conditions:**

    -   Triggered on creation of branches matching `refs/heads/release/*`.
    -   Runs automatically at 2 AM every weekday for the `dev` branch.
2.  **Version Handling:**

    -   Updates the version number with a `+dev.YYYYMMDD` suffix for the `dev` branch.
3.  **Dependency Installation:**

    -   Sets up Python 3.11 and installs project dependencies, including development and reinforcement learning extras.
4.  **Benchmark Execution:**

    -   Runs the benchmarking script and stores results in a version-based directory structure.
    -   Generates detailed markdown reports for benchmark results.
5.  **Commit and Push Results:**

    -   Commits and pushes benchmark results for release branches.
6.  **Artifact Preparation and Publishing:**

    -   Packages benchmark results into a tarball and publishes it as a pipeline artifact.

## Checklist
- [X] PR is linked to a **work item**
- [X] **acceptance criteria** of linked ticket are met
- [X] performed **self-review** of the code
- [ ] written **tests** for any new functionality added with this PR
- [ ] updated the **documentation** if this PR changes or adds functionality
- [ ] written/updated **design docs** if this PR implements new functionality
- [ ] updated the **change log**
- [X] ran **pre-commit** checks for code style
- [ ] attended to any **TO-DOs** left in the code

Related work items: #2648
This commit is contained in:
Christopher McCarthy
2024-06-27 12:58:33 +00:00
4 changed files with 149 additions and 91 deletions

View File

@@ -0,0 +1,84 @@
trigger:
branches:
exclude:
- '*'
include:
- 'refs/heads/release/*'
schedules:
- cron: "0 2 * * 1-5" # Run at 2 AM every weekday
displayName: "Weekday Schedule"
branches:
include:
- 'refs/heads/dev'
pool:
vmImage: ubuntu-latest
variables:
VERSION: ''
MAJOR_VERSION: ''
steps:
- checkout: self
persistCredentials: true
- script: |
VERSION=$(cat src/primaite/VERSION | tr -d '\n')
if [[ "$(Build.SourceBranch)" == "refs/heads/dev" ]]; then
DATE=$(date +%Y%m%d)
echo "${VERSION}+dev.${DATE}" > src/primaite/VERSION
fi
displayName: 'Update VERSION file for Dev Benchmark'
- script: |
VERSION=$(cat src/primaite/VERSION | tr -d '\n')
MAJOR_VERSION=$(echo $VERSION | cut -d. -f1)
echo "##vso[task.setvariable variable=VERSION]$VERSION"
echo "##vso[task.setvariable variable=MAJOR_VERSION]$MAJOR_VERSION"
displayName: 'Set Version Variables'
- task: UsePythonVersion@0
inputs:
versionSpec: '3.11'
addToPath: true
- script: |
python -m pip install --upgrade pip
pip install -e .[dev,rl]
primaite setup
displayName: 'Install Dependencies'
- script: |
cd benchmark
python3 primaite_benchmark.py
cd ..
displayName: 'Run Benchmarking Script'
- script: |
git config --global user.email "oss@dstl.gov.uk"
git config --global user.name "Defence Science and Technology Laboratory UK"
workingDirectory: $(System.DefaultWorkingDirectory)
displayName: 'Configure Git'
condition: and(succeeded(), eq(variables['Build.Reason'], 'Manual'), startsWith(variables['Build.SourceBranch'], 'refs/heads/release'))
- script: |
git add benchmark/results/v$(MAJOR_VERSION)/v$(VERSION)/*
git commit -m "Automated benchmark output commit for version $(VERSION)"
git push origin HEAD:refs/heads/$(Build.SourceBranchName)
displayName: 'Commit and Push Benchmark Results'
workingDirectory: $(System.DefaultWorkingDirectory)
env:
GIT_CREDENTIALS: $(System.AccessToken)
condition: and(succeeded(), startsWith(variables['Build.SourceBranch'], 'refs/heads/release'))
- script: |
tar czf primaite_v$(VERSION)_benchmark.tar.gz benchmark/results/v$(MAJOR_VERSION)/v$(VERSION)
displayName: 'Prepare Artifacts for Publishing'
- task: PublishPipelineArtifact@1
inputs:
targetPath: primaite_v$(VERSION)_benchmark.tar.gz
artifactName: 'benchmark-output'
publishLocation: 'pipeline'
displayName: 'Publish Benchmark Output as Artifact'

View File

@@ -117,14 +117,14 @@ class BenchmarkSession:
def generate_learn_metadata_dict(self) -> Dict[str, Any]: def generate_learn_metadata_dict(self) -> Dict[str, Any]:
"""Metadata specific to the learning session.""" """Metadata specific to the learning session."""
total_s, s_per_step, s_per_100_steps_10_nodes = self._learn_benchmark_durations() total_s, s_per_step, s_per_100_steps_10_nodes = self._learn_benchmark_durations()
self.gym_env.average_reward_per_episode.pop(0) # remove episode 0 self.gym_env.total_reward_per_episode.pop(0) # remove episode 0
return { return {
"total_episodes": self.gym_env.episode_counter, "total_episodes": self.gym_env.episode_counter,
"total_time_steps": self.gym_env.total_time_steps, "total_time_steps": self.gym_env.total_time_steps,
"total_s": total_s, "total_s": total_s,
"s_per_step": s_per_step, "s_per_step": s_per_step,
"s_per_100_steps_10_nodes": s_per_100_steps_10_nodes, "s_per_100_steps_10_nodes": s_per_100_steps_10_nodes,
"av_reward_per_episode": self.gym_env.average_reward_per_episode, "total_reward_per_episode": self.gym_env.total_reward_per_episode,
} }

View File

@@ -9,10 +9,6 @@ import plotly.graph_objects as go
import polars as pl import polars as pl
import yaml import yaml
from plotly.graph_objs import Figure from plotly.graph_objs import Figure
from pylatex import Command, Document
from pylatex import Figure as LatexFigure
from pylatex import Section, Subsection, Tabular
from pylatex.utils import bold
from utils import _get_system_info from utils import _get_system_info
import primaite import primaite
@@ -39,19 +35,19 @@ def _build_benchmark_results_dict(start_datetime: datetime, metadata_dict: Dict,
"av_s_per_step": sum(d["s_per_step"] for d in metadata_dict.values()) / num_sessions, "av_s_per_step": sum(d["s_per_step"] for d in metadata_dict.values()) / num_sessions,
"av_s_per_100_steps_10_nodes": sum(d["s_per_100_steps_10_nodes"] for d in metadata_dict.values()) "av_s_per_100_steps_10_nodes": sum(d["s_per_100_steps_10_nodes"] for d in metadata_dict.values())
/ num_sessions, / num_sessions,
"combined_av_reward_per_episode": {}, "combined_total_reward_per_episode": {},
"session_av_reward_per_episode": {k: v["av_reward_per_episode"] for k, v in metadata_dict.items()}, "session_total_reward_per_episode": {k: v["total_reward_per_episode"] for k, v in metadata_dict.items()},
"config": config, "config": config,
} }
# find the average of each episode across all sessions # find the average of each episode across all sessions
episodes = metadata_dict[1]["av_reward_per_episode"].keys() episodes = metadata_dict[1]["total_reward_per_episode"].keys()
for episode in episodes: for episode in episodes:
combined_av_reward = ( combined_av_reward = (
sum(metadata_dict[k]["av_reward_per_episode"][episode] for k in metadata_dict.keys()) / num_sessions sum(metadata_dict[k]["total_reward_per_episode"][episode] for k in metadata_dict.keys()) / num_sessions
) )
averaged_data["combined_av_reward_per_episode"][episode] = combined_av_reward averaged_data["combined_total_reward_per_episode"][episode] = combined_av_reward
return averaged_data return averaged_data
@@ -87,7 +83,7 @@ def _plot_benchmark_metadata(
fig = go.Figure(layout=layout) fig = go.Figure(layout=layout)
fig.update_layout(template=PLOT_CONFIG["template"]) fig.update_layout(template=PLOT_CONFIG["template"])
for session, av_reward_dict in benchmark_metadata_dict["session_av_reward_per_episode"].items(): for session, av_reward_dict in benchmark_metadata_dict["session_total_reward_per_episode"].items():
df = _get_df_from_episode_av_reward_dict(av_reward_dict) df = _get_df_from_episode_av_reward_dict(av_reward_dict)
fig.add_trace( fig.add_trace(
go.Scatter( go.Scatter(
@@ -100,7 +96,7 @@ def _plot_benchmark_metadata(
) )
) )
df = _get_df_from_episode_av_reward_dict(benchmark_metadata_dict["combined_av_reward_per_episode"]) df = _get_df_from_episode_av_reward_dict(benchmark_metadata_dict["combined_total_reward_per_episode"])
fig.add_trace( fig.add_trace(
go.Scatter( go.Scatter(
x=df["episode"], y=df["av_reward"], mode="lines", name="Combined Session Av", line={"color": "#FF0000"} x=df["episode"], y=df["av_reward"], mode="lines", name="Combined Session Av", line={"color": "#FF0000"}
@@ -136,11 +132,11 @@ def _plot_all_benchmarks_combined_session_av(results_directory: Path) -> Figure:
Does this by iterating over the ``benchmark/results`` directory and Does this by iterating over the ``benchmark/results`` directory and
extracting the benchmark metadata json for each version that has been extracting the benchmark metadata json for each version that has been
benchmarked. The combined_av_reward_per_episode is extracted from each, benchmarked. The combined_total_reward_per_episode is extracted from each,
converted into a polars dataframe, and plotted as a scatter line in plotly. converted into a polars dataframe, and plotted as a scatter line in plotly.
""" """
major_v = primaite.__version__.split(".")[0] major_v = primaite.__version__.split(".")[0]
title = f"Learning Benchmarking of All Released Versions under Major v{major_v}.*.*" title = f"Learning Benchmarking of All Released Versions under Major v{major_v}.#.#"
subtitle = "Rolling Av (Combined Session Av)" subtitle = "Rolling Av (Combined Session Av)"
if title: if title:
if subtitle: if subtitle:
@@ -162,7 +158,7 @@ def _plot_all_benchmarks_combined_session_av(results_directory: Path) -> Figure:
metadata_file = dir / f"{dir.name}_benchmark_metadata.json" metadata_file = dir / f"{dir.name}_benchmark_metadata.json"
with open(metadata_file, "r") as file: with open(metadata_file, "r") as file:
metadata_dict = json.load(file) metadata_dict = json.load(file)
df = _get_df_from_episode_av_reward_dict(metadata_dict["combined_av_reward_per_episode"]) df = _get_df_from_episode_av_reward_dict(metadata_dict["combined_total_reward_per_episode"])
fig.add_trace(go.Scatter(x=df["episode"], y=df["rolling_av_reward"], mode="lines", name=dir.name)) fig.add_trace(go.Scatter(x=df["episode"], y=df["rolling_av_reward"], mode="lines", name=dir.name))
@@ -208,98 +204,77 @@ def build_benchmark_latex_report(
fig = _plot_all_benchmarks_combined_session_av(results_directory=results_root_path) fig = _plot_all_benchmarks_combined_session_av(results_directory=results_root_path)
all_version_plot_path = results_root_path / "PrimAITE Versions Learning Benchmark.png" all_version_plot_path = version_result_dir / "PrimAITE Versions Learning Benchmark.png"
fig.write_image(all_version_plot_path) fig.write_image(all_version_plot_path)
geometry_options = {"tmargin": "2.5cm", "rmargin": "2.5cm", "bmargin": "2.5cm", "lmargin": "2.5cm"}
data = benchmark_metadata_dict data = benchmark_metadata_dict
primaite_version = data["primaite_version"] primaite_version = data["primaite_version"]
# Create a new document with open(version_result_dir / f"PrimAITE v{primaite_version} Learning Benchmark.md", "w") as file:
doc = Document("report", geometry_options=geometry_options) # Title
# Title file.write(f"# PrimAITE v{primaite_version} Learning Benchmark\n")
doc.preamble.append(Command("title", f"PrimAITE {primaite_version} Learning Benchmark")) file.write("## PrimAITE Dev Team\n")
doc.preamble.append(Command("author", "PrimAITE Dev Team")) file.write(f"### {datetime.now().date()}\n")
doc.preamble.append(Command("date", datetime.now().date())) file.write("\n---\n")
doc.append(Command("maketitle"))
sessions = data["total_sessions"] sessions = data["total_sessions"]
episodes = session_metadata[1]["total_episodes"] - 1 episodes = session_metadata[1]["total_episodes"] - 1
steps = data["config"]["game"]["max_episode_length"] steps = data["config"]["game"]["max_episode_length"]
# Body # Body
with doc.create(Section("Introduction")): file.write("## 1 Introduction\n")
doc.append( file.write(
f"PrimAITE v{primaite_version} was benchmarked automatically upon release. Learning rate metrics " f"PrimAITE v{primaite_version} was benchmarked automatically upon release. Learning rate metrics "
f"were captured to be referenced during system-level testing and user acceptance testing (UAT)." f"were captured to be referenced during system-level testing and user acceptance testing (UAT).\n"
) )
doc.append( file.write(
f"\nThe benchmarking process consists of running {sessions} training session using the same " f"The benchmarking process consists of running {sessions} training session using the same "
f"config file. Each session trains an agent for {episodes} episodes, " f"config file. Each session trains an agent for {episodes} episodes, "
f"with each episode consisting of {steps} steps." f"with each episode consisting of {steps} steps.\n"
) )
doc.append( file.write(
f"\nThe total reward per episode from each session is captured. This is then used to calculate an " f"The total reward per episode from each session is captured. This is then used to calculate an "
f"caverage total reward per episode from the {sessions} individual sessions for smoothing. " f"caverage total reward per episode from the {sessions} individual sessions for smoothing. "
f"Finally, a 25-widow rolling average of the average total reward per session is calculated for " f"Finally, a 25-widow rolling average of the average total reward per session is calculated for "
f"further smoothing." f"further smoothing.\n"
) )
with doc.create(Section("System Information")): file.write("## 2 System Information\n")
with doc.create(Subsection("Python")): i = 1
with doc.create(Tabular("|l|l|")) as table: file.write(f"### 2.{i} Python\n")
table.add_hline() file.write(f"**Version:** {sys.version}\n")
table.add_row((bold("Version"), sys.version))
table.add_hline()
for section, section_data in data["system_info"].items(): for section, section_data in data["system_info"].items():
i += 1
if section_data: if section_data:
with doc.create(Subsection(section)): file.write(f"### 2.{i} {section}\n")
if isinstance(section_data, dict): if isinstance(section_data, dict):
with doc.create(Tabular("|l|l|")) as table: for key, value in section_data.items():
table.add_hline() file.write(f"- **{key}:** {value}\n")
for key, value in section_data.items():
table.add_row((bold(key), value))
table.add_hline()
elif isinstance(section_data, list):
headers = section_data[0].keys()
tabs_str = "|".join(["l" for _ in range(len(headers))])
tabs_str = f"|{tabs_str}|"
with doc.create(Tabular(tabs_str)) as table:
table.add_hline()
table.add_row([bold(h) for h in headers])
table.add_hline()
for item in section_data:
table.add_row(item.values())
table.add_hline()
headers_map = { headers_map = {
"total_sessions": "Total Sessions", "total_sessions": "Total Sessions",
"total_episodes": "Total Episodes", "total_episodes": "Total Episodes",
"total_time_steps": "Total Steps", "total_time_steps": "Total Steps",
"av_s_per_session": "Av Session Duration (s)", "av_s_per_session": "Av Session Duration (s)",
"av_s_per_step": "Av Step Duration (s)", "av_s_per_step": "Av Step Duration (s)",
"av_s_per_100_steps_10_nodes": "Av Duration per 100 Steps per 10 Nodes (s)", "av_s_per_100_steps_10_nodes": "Av Duration per 100 Steps per 10 Nodes (s)",
} }
with doc.create(Section("Stats")):
with doc.create(Subsection("Benchmark Results")):
with doc.create(Tabular("|l|l|")) as table:
table.add_hline()
for section, header in headers_map.items():
if section.startswith("av_"):
table.add_row((bold(header), f"{data[section]:.4f}"))
else:
table.add_row((bold(header), data[section]))
table.add_hline()
with doc.create(Section("Graphs")): file.write("## 3 Stats\n")
with doc.create(Subsection(f"v{primaite_version} Learning Benchmark Plot")): for section, header in headers_map.items():
with doc.create(LatexFigure(position="h!")) as pic: if section.startswith("av_"):
pic.add_image(str(this_version_plot_path)) file.write(f"- **{header}:** {data[section]:.4f}\n")
pic.add_caption(f"PrimAITE {primaite_version} Learning Benchmark Plot") else:
file.write(f"- **{header}:** {data[section]}\n")
with doc.create(Subsection(f"Learning Benchmarking of All Released Versions under Major v{major_v}.*.*")): file.write("## 4 Graphs\n")
with doc.create(LatexFigure(position="h!")) as pic:
pic.add_image(str(all_version_plot_path))
pic.add_caption(f"Learning Benchmarking of All Released Versions under Major v{major_v}.*.*")
doc.generate_pdf(str(this_version_plot_path).replace(".png", ""), clean_tex=True) file.write(f"### 4.1 v{primaite_version} Learning Benchmark Plot\n")
file.write(f"![PrimAITE {primaite_version} Learning Benchmark Plot]({this_version_plot_path.name})\n")
file.write(f"### 4.2 Learning Benchmarking of All Released Versions under Major v{major_v}.#.#\n")
file.write(
f"![Learning Benchmarking of All Released Versions under "
f"Major v{major_v}.#.#]({all_version_plot_path.name})\n"
)

View File

@@ -64,7 +64,6 @@ dev = [
"gputil==1.4.0", "gputil==1.4.0",
"pip-licenses==4.3.0", "pip-licenses==4.3.0",
"pre-commit==2.20.0", "pre-commit==2.20.0",
"pylatex==1.4.1",
"pytest==7.2.0", "pytest==7.2.0",
"pytest-xdist==3.3.1", "pytest-xdist==3.3.1",
"pytest-cov==4.0.0", "pytest-cov==4.0.0",