Merge remote-tracking branch 'origin/dev' into feature/2641-prevent-databaseservice-interactions-from-occurring-in-fixing-state
This commit is contained in:
84
.azure/azure-benchmark-pipeline.yaml
Normal file
84
.azure/azure-benchmark-pipeline.yaml
Normal file
@@ -0,0 +1,84 @@
|
||||
trigger:
|
||||
branches:
|
||||
exclude:
|
||||
- '*'
|
||||
include:
|
||||
- 'refs/heads/release/*'
|
||||
|
||||
schedules:
|
||||
- cron: "0 2 * * 1-5" # Run at 2 AM every weekday
|
||||
displayName: "Weekday Schedule"
|
||||
branches:
|
||||
include:
|
||||
- 'refs/heads/dev'
|
||||
|
||||
pool:
|
||||
vmImage: ubuntu-latest
|
||||
|
||||
variables:
|
||||
VERSION: ''
|
||||
MAJOR_VERSION: ''
|
||||
|
||||
steps:
|
||||
- checkout: self
|
||||
persistCredentials: true
|
||||
|
||||
- script: |
|
||||
VERSION=$(cat src/primaite/VERSION | tr -d '\n')
|
||||
if [[ "$(Build.SourceBranch)" == "refs/heads/dev" ]]; then
|
||||
DATE=$(date +%Y%m%d)
|
||||
echo "${VERSION}+dev.${DATE}" > src/primaite/VERSION
|
||||
fi
|
||||
displayName: 'Update VERSION file for Dev Benchmark'
|
||||
|
||||
- script: |
|
||||
VERSION=$(cat src/primaite/VERSION | tr -d '\n')
|
||||
MAJOR_VERSION=$(echo $VERSION | cut -d. -f1)
|
||||
echo "##vso[task.setvariable variable=VERSION]$VERSION"
|
||||
echo "##vso[task.setvariable variable=MAJOR_VERSION]$MAJOR_VERSION"
|
||||
displayName: 'Set Version Variables'
|
||||
|
||||
- task: UsePythonVersion@0
|
||||
inputs:
|
||||
versionSpec: '3.11'
|
||||
addToPath: true
|
||||
|
||||
- script: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install -e .[dev,rl]
|
||||
primaite setup
|
||||
displayName: 'Install Dependencies'
|
||||
|
||||
- script: |
|
||||
cd benchmark
|
||||
python3 primaite_benchmark.py
|
||||
cd ..
|
||||
displayName: 'Run Benchmarking Script'
|
||||
|
||||
- script: |
|
||||
git config --global user.email "oss@dstl.gov.uk"
|
||||
git config --global user.name "Defence Science and Technology Laboratory UK"
|
||||
workingDirectory: $(System.DefaultWorkingDirectory)
|
||||
displayName: 'Configure Git'
|
||||
condition: and(succeeded(), eq(variables['Build.Reason'], 'Manual'), startsWith(variables['Build.SourceBranch'], 'refs/heads/release'))
|
||||
|
||||
- script: |
|
||||
git add benchmark/results/v$(MAJOR_VERSION)/v$(VERSION)/*
|
||||
git commit -m "Automated benchmark output commit for version $(VERSION)"
|
||||
git push origin HEAD:refs/heads/$(Build.SourceBranchName)
|
||||
displayName: 'Commit and Push Benchmark Results'
|
||||
workingDirectory: $(System.DefaultWorkingDirectory)
|
||||
env:
|
||||
GIT_CREDENTIALS: $(System.AccessToken)
|
||||
condition: and(succeeded(), startsWith(variables['Build.SourceBranch'], 'refs/heads/release'))
|
||||
|
||||
- script: |
|
||||
tar czf primaite_v$(VERSION)_benchmark.tar.gz benchmark/results/v$(MAJOR_VERSION)/v$(VERSION)
|
||||
displayName: 'Prepare Artifacts for Publishing'
|
||||
|
||||
- task: PublishPipelineArtifact@1
|
||||
inputs:
|
||||
targetPath: primaite_v$(VERSION)_benchmark.tar.gz
|
||||
artifactName: 'benchmark-output'
|
||||
publishLocation: 'pipeline'
|
||||
displayName: 'Publish Benchmark Output as Artifact'
|
||||
@@ -117,14 +117,14 @@ class BenchmarkSession:
|
||||
def generate_learn_metadata_dict(self) -> Dict[str, Any]:
|
||||
"""Metadata specific to the learning session."""
|
||||
total_s, s_per_step, s_per_100_steps_10_nodes = self._learn_benchmark_durations()
|
||||
self.gym_env.average_reward_per_episode.pop(0) # remove episode 0
|
||||
self.gym_env.total_reward_per_episode.pop(0) # remove episode 0
|
||||
return {
|
||||
"total_episodes": self.gym_env.episode_counter,
|
||||
"total_time_steps": self.gym_env.total_time_steps,
|
||||
"total_s": total_s,
|
||||
"s_per_step": s_per_step,
|
||||
"s_per_100_steps_10_nodes": s_per_100_steps_10_nodes,
|
||||
"av_reward_per_episode": self.gym_env.average_reward_per_episode,
|
||||
"total_reward_per_episode": self.gym_env.total_reward_per_episode,
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -9,10 +9,6 @@ import plotly.graph_objects as go
|
||||
import polars as pl
|
||||
import yaml
|
||||
from plotly.graph_objs import Figure
|
||||
from pylatex import Command, Document
|
||||
from pylatex import Figure as LatexFigure
|
||||
from pylatex import Section, Subsection, Tabular
|
||||
from pylatex.utils import bold
|
||||
from utils import _get_system_info
|
||||
|
||||
import primaite
|
||||
@@ -39,19 +35,19 @@ def _build_benchmark_results_dict(start_datetime: datetime, metadata_dict: Dict,
|
||||
"av_s_per_step": sum(d["s_per_step"] for d in metadata_dict.values()) / num_sessions,
|
||||
"av_s_per_100_steps_10_nodes": sum(d["s_per_100_steps_10_nodes"] for d in metadata_dict.values())
|
||||
/ num_sessions,
|
||||
"combined_av_reward_per_episode": {},
|
||||
"session_av_reward_per_episode": {k: v["av_reward_per_episode"] for k, v in metadata_dict.items()},
|
||||
"combined_total_reward_per_episode": {},
|
||||
"session_total_reward_per_episode": {k: v["total_reward_per_episode"] for k, v in metadata_dict.items()},
|
||||
"config": config,
|
||||
}
|
||||
|
||||
# find the average of each episode across all sessions
|
||||
episodes = metadata_dict[1]["av_reward_per_episode"].keys()
|
||||
episodes = metadata_dict[1]["total_reward_per_episode"].keys()
|
||||
|
||||
for episode in episodes:
|
||||
combined_av_reward = (
|
||||
sum(metadata_dict[k]["av_reward_per_episode"][episode] for k in metadata_dict.keys()) / num_sessions
|
||||
sum(metadata_dict[k]["total_reward_per_episode"][episode] for k in metadata_dict.keys()) / num_sessions
|
||||
)
|
||||
averaged_data["combined_av_reward_per_episode"][episode] = combined_av_reward
|
||||
averaged_data["combined_total_reward_per_episode"][episode] = combined_av_reward
|
||||
|
||||
return averaged_data
|
||||
|
||||
@@ -87,7 +83,7 @@ def _plot_benchmark_metadata(
|
||||
fig = go.Figure(layout=layout)
|
||||
fig.update_layout(template=PLOT_CONFIG["template"])
|
||||
|
||||
for session, av_reward_dict in benchmark_metadata_dict["session_av_reward_per_episode"].items():
|
||||
for session, av_reward_dict in benchmark_metadata_dict["session_total_reward_per_episode"].items():
|
||||
df = _get_df_from_episode_av_reward_dict(av_reward_dict)
|
||||
fig.add_trace(
|
||||
go.Scatter(
|
||||
@@ -100,7 +96,7 @@ def _plot_benchmark_metadata(
|
||||
)
|
||||
)
|
||||
|
||||
df = _get_df_from_episode_av_reward_dict(benchmark_metadata_dict["combined_av_reward_per_episode"])
|
||||
df = _get_df_from_episode_av_reward_dict(benchmark_metadata_dict["combined_total_reward_per_episode"])
|
||||
fig.add_trace(
|
||||
go.Scatter(
|
||||
x=df["episode"], y=df["av_reward"], mode="lines", name="Combined Session Av", line={"color": "#FF0000"}
|
||||
@@ -136,11 +132,11 @@ def _plot_all_benchmarks_combined_session_av(results_directory: Path) -> Figure:
|
||||
|
||||
Does this by iterating over the ``benchmark/results`` directory and
|
||||
extracting the benchmark metadata json for each version that has been
|
||||
benchmarked. The combined_av_reward_per_episode is extracted from each,
|
||||
benchmarked. The combined_total_reward_per_episode is extracted from each,
|
||||
converted into a polars dataframe, and plotted as a scatter line in plotly.
|
||||
"""
|
||||
major_v = primaite.__version__.split(".")[0]
|
||||
title = f"Learning Benchmarking of All Released Versions under Major v{major_v}.*.*"
|
||||
title = f"Learning Benchmarking of All Released Versions under Major v{major_v}.#.#"
|
||||
subtitle = "Rolling Av (Combined Session Av)"
|
||||
if title:
|
||||
if subtitle:
|
||||
@@ -162,7 +158,7 @@ def _plot_all_benchmarks_combined_session_av(results_directory: Path) -> Figure:
|
||||
metadata_file = dir / f"{dir.name}_benchmark_metadata.json"
|
||||
with open(metadata_file, "r") as file:
|
||||
metadata_dict = json.load(file)
|
||||
df = _get_df_from_episode_av_reward_dict(metadata_dict["combined_av_reward_per_episode"])
|
||||
df = _get_df_from_episode_av_reward_dict(metadata_dict["combined_total_reward_per_episode"])
|
||||
|
||||
fig.add_trace(go.Scatter(x=df["episode"], y=df["rolling_av_reward"], mode="lines", name=dir.name))
|
||||
|
||||
@@ -208,98 +204,77 @@ def build_benchmark_latex_report(
|
||||
|
||||
fig = _plot_all_benchmarks_combined_session_av(results_directory=results_root_path)
|
||||
|
||||
all_version_plot_path = results_root_path / "PrimAITE Versions Learning Benchmark.png"
|
||||
all_version_plot_path = version_result_dir / "PrimAITE Versions Learning Benchmark.png"
|
||||
fig.write_image(all_version_plot_path)
|
||||
|
||||
geometry_options = {"tmargin": "2.5cm", "rmargin": "2.5cm", "bmargin": "2.5cm", "lmargin": "2.5cm"}
|
||||
data = benchmark_metadata_dict
|
||||
primaite_version = data["primaite_version"]
|
||||
|
||||
# Create a new document
|
||||
doc = Document("report", geometry_options=geometry_options)
|
||||
# Title
|
||||
doc.preamble.append(Command("title", f"PrimAITE {primaite_version} Learning Benchmark"))
|
||||
doc.preamble.append(Command("author", "PrimAITE Dev Team"))
|
||||
doc.preamble.append(Command("date", datetime.now().date()))
|
||||
doc.append(Command("maketitle"))
|
||||
with open(version_result_dir / f"PrimAITE v{primaite_version} Learning Benchmark.md", "w") as file:
|
||||
# Title
|
||||
file.write(f"# PrimAITE v{primaite_version} Learning Benchmark\n")
|
||||
file.write("## PrimAITE Dev Team\n")
|
||||
file.write(f"### {datetime.now().date()}\n")
|
||||
file.write("\n---\n")
|
||||
|
||||
sessions = data["total_sessions"]
|
||||
episodes = session_metadata[1]["total_episodes"] - 1
|
||||
steps = data["config"]["game"]["max_episode_length"]
|
||||
sessions = data["total_sessions"]
|
||||
episodes = session_metadata[1]["total_episodes"] - 1
|
||||
steps = data["config"]["game"]["max_episode_length"]
|
||||
|
||||
# Body
|
||||
with doc.create(Section("Introduction")):
|
||||
doc.append(
|
||||
# Body
|
||||
file.write("## 1 Introduction\n")
|
||||
file.write(
|
||||
f"PrimAITE v{primaite_version} was benchmarked automatically upon release. Learning rate metrics "
|
||||
f"were captured to be referenced during system-level testing and user acceptance testing (UAT)."
|
||||
f"were captured to be referenced during system-level testing and user acceptance testing (UAT).\n"
|
||||
)
|
||||
doc.append(
|
||||
f"\nThe benchmarking process consists of running {sessions} training session using the same "
|
||||
file.write(
|
||||
f"The benchmarking process consists of running {sessions} training session using the same "
|
||||
f"config file. Each session trains an agent for {episodes} episodes, "
|
||||
f"with each episode consisting of {steps} steps."
|
||||
f"with each episode consisting of {steps} steps.\n"
|
||||
)
|
||||
doc.append(
|
||||
f"\nThe total reward per episode from each session is captured. This is then used to calculate an "
|
||||
file.write(
|
||||
f"The total reward per episode from each session is captured. This is then used to calculate an "
|
||||
f"caverage total reward per episode from the {sessions} individual sessions for smoothing. "
|
||||
f"Finally, a 25-widow rolling average of the average total reward per session is calculated for "
|
||||
f"further smoothing."
|
||||
f"further smoothing.\n"
|
||||
)
|
||||
|
||||
with doc.create(Section("System Information")):
|
||||
with doc.create(Subsection("Python")):
|
||||
with doc.create(Tabular("|l|l|")) as table:
|
||||
table.add_hline()
|
||||
table.add_row((bold("Version"), sys.version))
|
||||
table.add_hline()
|
||||
file.write("## 2 System Information\n")
|
||||
i = 1
|
||||
file.write(f"### 2.{i} Python\n")
|
||||
file.write(f"**Version:** {sys.version}\n")
|
||||
|
||||
for section, section_data in data["system_info"].items():
|
||||
i += 1
|
||||
if section_data:
|
||||
with doc.create(Subsection(section)):
|
||||
if isinstance(section_data, dict):
|
||||
with doc.create(Tabular("|l|l|")) as table:
|
||||
table.add_hline()
|
||||
for key, value in section_data.items():
|
||||
table.add_row((bold(key), value))
|
||||
table.add_hline()
|
||||
elif isinstance(section_data, list):
|
||||
headers = section_data[0].keys()
|
||||
tabs_str = "|".join(["l" for _ in range(len(headers))])
|
||||
tabs_str = f"|{tabs_str}|"
|
||||
with doc.create(Tabular(tabs_str)) as table:
|
||||
table.add_hline()
|
||||
table.add_row([bold(h) for h in headers])
|
||||
table.add_hline()
|
||||
for item in section_data:
|
||||
table.add_row(item.values())
|
||||
table.add_hline()
|
||||
file.write(f"### 2.{i} {section}\n")
|
||||
if isinstance(section_data, dict):
|
||||
for key, value in section_data.items():
|
||||
file.write(f"- **{key}:** {value}\n")
|
||||
|
||||
headers_map = {
|
||||
"total_sessions": "Total Sessions",
|
||||
"total_episodes": "Total Episodes",
|
||||
"total_time_steps": "Total Steps",
|
||||
"av_s_per_session": "Av Session Duration (s)",
|
||||
"av_s_per_step": "Av Step Duration (s)",
|
||||
"av_s_per_100_steps_10_nodes": "Av Duration per 100 Steps per 10 Nodes (s)",
|
||||
}
|
||||
with doc.create(Section("Stats")):
|
||||
with doc.create(Subsection("Benchmark Results")):
|
||||
with doc.create(Tabular("|l|l|")) as table:
|
||||
table.add_hline()
|
||||
for section, header in headers_map.items():
|
||||
if section.startswith("av_"):
|
||||
table.add_row((bold(header), f"{data[section]:.4f}"))
|
||||
else:
|
||||
table.add_row((bold(header), data[section]))
|
||||
table.add_hline()
|
||||
headers_map = {
|
||||
"total_sessions": "Total Sessions",
|
||||
"total_episodes": "Total Episodes",
|
||||
"total_time_steps": "Total Steps",
|
||||
"av_s_per_session": "Av Session Duration (s)",
|
||||
"av_s_per_step": "Av Step Duration (s)",
|
||||
"av_s_per_100_steps_10_nodes": "Av Duration per 100 Steps per 10 Nodes (s)",
|
||||
}
|
||||
|
||||
with doc.create(Section("Graphs")):
|
||||
with doc.create(Subsection(f"v{primaite_version} Learning Benchmark Plot")):
|
||||
with doc.create(LatexFigure(position="h!")) as pic:
|
||||
pic.add_image(str(this_version_plot_path))
|
||||
pic.add_caption(f"PrimAITE {primaite_version} Learning Benchmark Plot")
|
||||
file.write("## 3 Stats\n")
|
||||
for section, header in headers_map.items():
|
||||
if section.startswith("av_"):
|
||||
file.write(f"- **{header}:** {data[section]:.4f}\n")
|
||||
else:
|
||||
file.write(f"- **{header}:** {data[section]}\n")
|
||||
|
||||
with doc.create(Subsection(f"Learning Benchmarking of All Released Versions under Major v{major_v}.*.*")):
|
||||
with doc.create(LatexFigure(position="h!")) as pic:
|
||||
pic.add_image(str(all_version_plot_path))
|
||||
pic.add_caption(f"Learning Benchmarking of All Released Versions under Major v{major_v}.*.*")
|
||||
file.write("## 4 Graphs\n")
|
||||
|
||||
doc.generate_pdf(str(this_version_plot_path).replace(".png", ""), clean_tex=True)
|
||||
file.write(f"### 4.1 v{primaite_version} Learning Benchmark Plot\n")
|
||||
file.write(f"\n")
|
||||
|
||||
file.write(f"### 4.2 Learning Benchmarking of All Released Versions under Major v{major_v}.#.#\n")
|
||||
file.write(
|
||||
f"\n"
|
||||
)
|
||||
|
||||
@@ -64,7 +64,6 @@ dev = [
|
||||
"gputil==1.4.0",
|
||||
"pip-licenses==4.3.0",
|
||||
"pre-commit==2.20.0",
|
||||
"pylatex==1.4.1",
|
||||
"pytest==7.2.0",
|
||||
"pytest-xdist==3.3.1",
|
||||
"pytest-cov==4.0.0",
|
||||
|
||||
Reference in New Issue
Block a user