Merged PR 130: #1595: load session double run

## Summary
- Fixed the bug where session gets run twice when loading a session via CLI
- Added a test for the CLI run - xskipped while the bugfix for load session acting odd is tbd
- Fixed a minor bug in PrimAITE session where session_path is overwritten

## Test process
Added a new test for CLI, but xskipped while a different bug is tbd

Ran it locally and no longer runs another session after the loaded session
```
(venv) PS D:\Projects\ARCD\PrimAITE\PrimAITE> primaite session --load [REDACTED for security]\primaite\sessions\2023-07-20\2023-07-20_15-01-11
2023-07-20 15:04:21,320: Using: AgentFramework.SB3, AgentIdentifier.PPO, ActionType.NODE, observation_space=NODE_LINK_TABLE, Training: 5 episodes @ 256 stepsEvaluation: 5 episodes @ 256 steps
2023-07-20 15:04:21,335: Environment configuration loaded
Environment configuration loaded
2023-07-20 15:04:21,775: Welcome to the Primary-level AI Training Environment (PrimAITE) (version: 2.0.0rc1)
2023-07-20 15:04:21,775: The output directory for this session is: C:\Users\czar.echavez\primaite\sessions\2023-07-20\2023-07-20_15-04-21
2023-07-20 15:04:21,779: Beginning learning for 10 episodes @ 256 time steps...
2023-07-20 15:04:22,379: Episode: 1, Average Reward: -0.0020839843750000003
2023-07-20 15:04:23,137: Episode: 2, Average Reward: -0.0021933593750000004
2023-07-20 15:04:23,831: Episode: 3, Average Reward: -0.0022617187500000003
2023-07-20 15:04:24,486: Episode: 4, Average Reward: -0.002373046874999999
2023-07-20 15:04:25,125: Episode: 5, Average Reward: -0.0018066406250000014
2023-07-20 15:04:25,791: Episode: 6, Average Reward: -0.0017597656250000013
2023-07-20 15:04:26,415: Episode: 7, Average Reward: -0.0018437500000000014
2023-07-20 15:04:27,053: Episode: 8, Average Reward: -0.0019101562500000015
2023-07-20 15:04:27,715: Episode: 9, Average Reward: -0.0016777343750000013
2023-07-20 15:04:28,359: Episode: 10, Average Reward: -0.0015976562500000012
2023-07-20 15:04:28,550: Finished learning
2023-07-20 15:04:30,851: Beginning deterministic evaluation for 5 episodes @ 256 time steps...
2023-07-20 15:04:31,243: Episode: 1, Average Reward: -0.0018515625000000014
2023-07-20 15:04:31,663: Episode: 2, Average Reward: -0.0018515625000000014
2023-07-20 15:04:32,112: Episode: 3, Average Reward: -0.0018515625000000014
2023-07-20 15:04:32,505: Episode: 4, Average Reward: -0.0018515625000000014
2023-07-20 15:04:32,904: Episode: 5, Average Reward: -0.0018515625000000014
2023-07-20 15:04:32,998: Finished evaluation

```

Also fixed the xskipped tests, since the double running seems to have caused the issue of rewards not matching.

Added a test that runs the PrimAITE in CLI

## Checklist
- [x] This PR is linked to a **work item**
- [x] I have performed **self-review** of the code
- [x] I have written **tests** for any new functionality added with this PR
- [x] I have updated the **documentation** if this PR changes or adds functionality
- [x] I have run **pre-commit** checks for code style

#1595:
- Fixed the...
This commit is contained in:
Czar Echavez
2023-07-21 11:32:32 +00:00
3 changed files with 53 additions and 65 deletions

View File

@@ -173,15 +173,18 @@ def session(tc: Optional[str] = None, ldc: Optional[str] = None, load: Optional[
from primaite.main import run
if load is not None:
# run a loaded session
run(session_path=load)
if not tc:
tc = main_training_config_path()
else:
# start a new session using tc and ldc
if not tc:
tc = main_training_config_path()
if not ldc:
ldc = dos_very_basic_config_path()
if not ldc:
ldc = dos_very_basic_config_path()
run(training_config_path=tc, lay_down_config_path=ldc)
run(training_config_path=tc, lay_down_config_path=ldc)
@app.command()

View File

@@ -72,13 +72,7 @@ class PrimaiteSession:
if not isinstance(lay_down_config_path, Path):
lay_down_config_path = Path(lay_down_config_path)
self._lay_down_config_path: Final[Union[Path, str]] = lay_down_config_path
self._lay_down_config: Dict = lay_down_config.load(self._lay_down_config_path)
self._agent_session: AgentSessionABC = None # noqa
self.session_path: Path = None # noqa
self.timestamp_str: str = None # noqa
self.learning_path: Path = None # noqa
self.evaluation_path: Path = None # noqa
self._lay_down_config: Dict = lay_down_config.load(self._lay_down_config_path) # noqa
def setup(self) -> None:
"""Performs the session setup."""

View File

@@ -6,10 +6,11 @@ from pathlib import Path
from typing import Union
from uuid import uuid4
import pytest
from typer.testing import CliRunner
from primaite import getLogger
from primaite.agents.sb3 import SB3Agent
from primaite.cli import app
from primaite.common.enums import AgentFramework, AgentIdentifier
from primaite.main import run
from primaite.primaite_session import PrimaiteSession
@@ -18,6 +19,24 @@ from tests import TEST_ASSETS_ROOT
_LOGGER = getLogger(__name__)
runner = CliRunner()
sb3_expected_avg_reward_per_episode = {
10: 0.0,
11: -0.0011074218750000008,
12: -0.0010000000000000007,
13: -0.0016601562500000013,
14: -0.001400390625000001,
15: -0.0009863281250000007,
16: -0.0011855468750000008,
17: -0.0009511718750000007,
18: -0.0008789062500000007,
19: -0.0012226562500000009,
20: -0.0010292968750000007,
}
sb3_expected_eval_rewards = -0.0018515625000000014
def copy_session_asset(asset_path: Union[str, Path]) -> str:
"""Copies the asset into a temporary test folder."""
@@ -43,25 +62,8 @@ def copy_session_asset(asset_path: Union[str, Path]) -> str:
return copy_path
@pytest.mark.xfail(
reason="Loading works fine but the exact values change with code changes, a bug report has been created."
)
def test_load_sb3_session():
"""Test that loading an SB3 agent works."""
expected_learn_mean_reward_per_episode = {
10: 0,
11: -0.008037109374999995,
12: -0.007978515624999988,
13: -0.008191406249999991,
14: -0.00817578124999999,
15: -0.008085937499999998,
16: -0.007837890624999982,
17: -0.007798828124999992,
18: -0.007777343749999998,
19: -0.007958984374999988,
20: -0.0077499999999999835,
}
test_path = copy_session_asset(TEST_ASSETS_ROOT / "example_sb3_agent_session")
loaded_agent = SB3Agent(session_path=test_path)
@@ -82,7 +84,7 @@ def test_load_sb3_session():
)
# run is seeded so should have the expected learn value
assert learn_mean_rewards == expected_learn_mean_reward_per_episode
assert learn_mean_rewards == sb3_expected_avg_reward_per_episode
# run an evaluation
loaded_agent.evaluate()
@@ -96,29 +98,14 @@ def test_load_sb3_session():
assert len(set(eval_mean_reward.values())) == 1
# the evaluation should be the same as a previous run
assert next(iter(set(eval_mean_reward.values()))) == -0.009896484374999988
assert next(iter(set(eval_mean_reward.values()))) == sb3_expected_eval_rewards
# delete the test directory
shutil.rmtree(test_path)
@pytest.mark.xfail(reason="Temporarily don't worry about this not working")
def test_load_primaite_session():
"""Test that loading a Primaite session works."""
expected_learn_mean_reward_per_episode = {
10: 0,
11: -0.008037109374999995,
12: -0.007978515624999988,
13: -0.008191406249999991,
14: -0.00817578124999999,
15: -0.008085937499999998,
16: -0.007837890624999982,
17: -0.007798828124999992,
18: -0.007777343749999998,
19: -0.007958984374999988,
20: -0.0077499999999999835,
}
test_path = copy_session_asset(TEST_ASSETS_ROOT / "example_sb3_agent_session")
# create loaded session
@@ -143,7 +130,7 @@ def test_load_primaite_session():
)
# run is seeded so should have the expected learn value
assert learn_mean_rewards == expected_learn_mean_reward_per_episode
assert learn_mean_rewards == sb3_expected_avg_reward_per_episode
# run an evaluation
session.evaluate()
@@ -157,29 +144,14 @@ def test_load_primaite_session():
assert len(set(eval_mean_reward.values())) == 1
# the evaluation should be the same as a previous run
assert next(iter(set(eval_mean_reward.values()))) == -0.009896484374999988
assert next(iter(set(eval_mean_reward.values()))) == sb3_expected_eval_rewards
# delete the test directory
shutil.rmtree(test_path)
@pytest.mark.xfail(reason="Temporarily don't worry about this not working")
def test_run_loading():
"""Test loading session via main.run."""
expected_learn_mean_reward_per_episode = {
10: 0,
11: -0.008037109374999995,
12: -0.007978515624999988,
13: -0.008191406249999991,
14: -0.00817578124999999,
15: -0.008085937499999998,
16: -0.007837890624999982,
17: -0.007798828124999992,
18: -0.007777343749999998,
19: -0.007958984374999988,
20: -0.0077499999999999835,
}
test_path = copy_session_asset(TEST_ASSETS_ROOT / "example_sb3_agent_session")
# create loaded session
@@ -190,7 +162,26 @@ def test_run_loading():
)
# run is seeded so should have the expected learn value
assert learn_mean_rewards == expected_learn_mean_reward_per_episode
assert learn_mean_rewards == sb3_expected_avg_reward_per_episode
# delete the test directory
shutil.rmtree(test_path)
def test_cli():
"""Test loading session via CLI."""
test_path = copy_session_asset(TEST_ASSETS_ROOT / "example_sb3_agent_session")
result = runner.invoke(app, ["session", "--load", test_path])
# cli should work
assert result.exit_code == 0
learn_mean_rewards = av_rewards_dict(
next(Path(test_path).rglob("**/learning/average_reward_per_episode_*.csv"), None)
)
# run is seeded so should have the expected learn value
assert learn_mean_rewards == sb3_expected_avg_reward_per_episode
# delete the test directory
shutil.rmtree(test_path)