#901 - Dropped temp_primaite_sessiion_2 from conftest.py.

- Re-added the hard-coded mean rewards per episode values from a rpe-trained agent to the deterministic test in test_seeding_and_deterministic_session.py
- Partially tidies up some tests in test_observation_space.py; Still some work to be done on this at a later date.
This commit is contained in:
Chris McCarthy
2023-07-14 14:13:11 +01:00
parent e743b2380c
commit f9c7cafe87
7 changed files with 77 additions and 129 deletions

View File

@@ -35,7 +35,7 @@ random_red_agent: False
# Default is None (null)
seed: null
# Set whether the agent will be deterministic instead of stochastic
# Set whether the agent evaluation will be deterministic instead of stochastic
# Options are:
# True
# False

View File

@@ -35,7 +35,7 @@ random_red_agent: False
# Default is None (null)
seed: None
# Set whether the agent will be deterministic instead of stochastic
# Set whether the agent evaluation will be deterministic instead of stochastic
# Options are:
# True
# False

View File

@@ -35,7 +35,7 @@ random_red_agent: False
# Default is None (null)
seed: 67890
# Set whether the agent will be deterministic instead of stochastic
# Set whether the agent evaluation will be deterministic instead of stochastic
# Options are:
# True
# False
@@ -66,7 +66,7 @@ num_train_episodes: 10
num_train_steps: 256
# Number of episodes to run per session
num_eval_episodes: 1
num_eval_episodes: 5
# Number of time_steps per episode
num_eval_steps: 256

View File

@@ -119,60 +119,6 @@ def temp_primaite_session(request):
return TempPrimaiteSession(training_config_path, lay_down_config_path)
@pytest.fixture
def temp_primaite_session_2(request):
"""
Provides a temporary PrimaiteSession instance.
It's temporary as it uses a temporary directory as the session path.
To use this fixture you need to:
- parametrize your test function with:
- "temp_primaite_session"
- [[path to training config, path to lay down config]]
- Include the temp_primaite_session fixture as a param in your test
function.
- use the temp_primaite_session as a context manager assigning is the
name 'session'.
.. code:: python
from primaite.config.lay_down_config import dos_very_basic_config_path
from primaite.config.training_config import main_training_config_path
@pytest.mark.parametrize(
"temp_primaite_session",
[
[main_training_config_path(), dos_very_basic_config_path()]
],
indirect=True
)
def test_primaite_session(temp_primaite_session):
with temp_primaite_session as session:
# Learning outputs are saved in session.learning_path
session.learn()
# Evaluation outputs are saved in session.evaluation_path
session.evaluate()
# To ensure that all files are written, you must call .close()
session.close()
# If you need to inspect any session outputs, it must be done
# inside the context manager
# Now that we've exited the context manager, the
# session.session_path directory and its contents are deleted
"""
training_config_path = request.param[0]
lay_down_config_path = request.param[1]
with patch("primaite.agents.agent.get_session_path", get_temp_session_path) as mck:
mck.session_timestamp = datetime.now()
return TempPrimaiteSession(training_config_path, lay_down_config_path)
@pytest.fixture
def temp_session_path() -> Path:
"""

View File

@@ -4,41 +4,9 @@ import numpy as np
import pytest
from primaite.environment.observations import NodeLinkTable, NodeStatuses, ObservationsHandler
from primaite.environment.primaite_env import Primaite
from tests import TEST_CONFIG_ROOT
def run_generic_set_actions(env: Primaite):
"""Run against a generic agent with specified blue agent actions."""
# Reset the environment at the start of the episode
# env.reset()
training_config = env.training_config
for episode in range(0, training_config.num_train_episodes):
for step in range(0, training_config.num_train_steps):
# Send the observation space to the agent to get an action
# TEMP - random action for now
# action = env.blue_agent_action(obs)
action = 0
print("Episode:", episode, "\nStep:", step)
if step == 2:
# [1, 1, 2, 1, 1, 1, 1(position)]
# NEED [1, 1, 1, 2, 1, 1, 1]
# Creates an ACL rule
# Allows traffic from server_1 to node_1 on port FTP
action = 43
elif step == 4:
action = 96
# Run the simulation step on the live environment
obs, reward, done, info = env.step(action)
# Break if done is True
if done:
break
return env
@pytest.mark.parametrize(
"temp_primaite_session",
[
@@ -317,13 +285,9 @@ class TestAccessControlList:
assert np.array_equal(obs, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2])
def test_observation_space_with_implicit_rule(self, temp_primaite_session):
"""Test observation space is what is expected when an agent adds ACLs during an episode."""
# Used to use env from test fixture but AtrributeError function object has no 'training_config'
with temp_primaite_session as session:
env = session.env
env = run_generic_set_actions(env)
obs = env.env_obs
"""
Test observation space is what is expected when an agent adds ACLs during an episode.
Observation space at the end of the episode.
At the start of the episode, there is a single implicit Deny rule = 1,1,1,1,1,0
(0 represents its initial position at top of ACL list)
@@ -333,6 +297,38 @@ class TestAccessControlList:
On Step 7, there is a second rule added at POSITION 1: 2,4,2,3,3,1
THINK THE RULES SHOULD BE THE OTHER WAY AROUND IN THE CURRENT OBSERVATION
"""
# TODO: Refactor this at some point to build a custom ACL Hardcoded
# Agent and then patch the AgentIdentifier Enum class so that it
# has ACL_AGENT. This then allows us to set the agent identified in
# the main config and is a bit cleaner.
# Used to use env from test fixture but AtrributeError function object has no 'training_config'
with temp_primaite_session as session:
env = session.env
training_config = env.training_config
for episode in range(0, training_config.num_train_episodes):
for step in range(0, training_config.num_train_steps):
# Send the observation space to the agent to get an action
# TEMP - random action for now
# action = env.blue_agent_action(obs)
action = 0
print("Episode:", episode, "\nStep:", step)
if step == 2:
# [1, 1, 2, 1, 1, 1, 1(position)]
# NEED [1, 1, 1, 2, 1, 1, 1]
# Creates an ACL rule
# Allows traffic from server_1 to node_1 on port FTP
action = 43
elif step == 4:
action = 96
# Run the simulation step on the live environment
obs, reward, done, info = env.step(action)
# Break if done is True
if done:
break
obs = env.env_obs
print("what i am testing", obs)
# acl rule 1
# source is 1 should be 4

View File

@@ -1,5 +1,3 @@
import time
import pytest as pytest
from primaite.config.lay_down_config import dos_very_basic_config_path
@@ -11,45 +9,45 @@ from tests import TEST_CONFIG_ROOT
[[TEST_CONFIG_ROOT / "ppo_seeded_training_config.yaml", dos_very_basic_config_path()]],
indirect=True,
)
@pytest.mark.parametrize(
"temp_primaite_session_2",
[[TEST_CONFIG_ROOT / "ppo_seeded_training_config.yaml", dos_very_basic_config_path()]],
indirect=True,
)
def test_seeded_learning(temp_primaite_session, temp_primaite_session_2):
"""Test running seeded learning produces the same output when ran twice."""
def test_seeded_learning(temp_primaite_session):
"""
Test running seeded learning produces the same output when ran twice.
.. note::
If this is failing, the hard-coded expected_mean_reward_per_episode
from a pre-trained agent will probably need to be updated. If the
env changes and those changed how this agent is trained, chances are
the mean rewards are going to be different.
Run the test, but print out the session.learn_av_reward_per_episode()
before comparing it. Then copy the printed dict and replace the
expected_mean_reward_per_episode with those values. The test should
now work. If not, then you've got a bug :).
"""
expected_mean_reward_per_episode = {
1: -90.703125,
2: -91.15234375,
3: -87.5,
4: -92.2265625,
5: -94.6875,
6: -91.19140625,
7: -88.984375,
8: -88.3203125,
9: -112.79296875,
10: -100.01953125,
1: -33.90625,
2: -32.32421875,
3: -25.234375,
4: -30.15625,
5: -27.1484375,
6: -29.609375,
7: -29.921875,
8: -29.3359375,
9: -28.046875,
10: -27.24609375,
}
"""
with temp_primaite_session as session:
assert session._training_config.seed == 67890, (
"Expected output is based upon a agent that was trained with " "seed 67890"
)
assert (
session._training_config.seed == 67890
), "Expected output is based upon a agent that was trained with seed 67890"
session.learn()
actual_mean_reward_per_episode_run_1 = session.learn_av_reward_per_episode()
time.sleep(2)
with temp_primaite_session_2 as session:
assert session._training_config.seed == 67890, (
"Expected output is based upon a agent that was trained with " "seed 67890"
)
session.learn()
actual_mean_reward_per_episode_run_2 = session.learn_av_reward_per_episode()
assert actual_mean_reward_per_episode_run_1 == actual_mean_reward_per_episode_run_2
assert expected_mean_reward_per_episode == session.learn_av_reward_per_episode()
@pytest.mark.skip(reason="Inconsistent results. Needs someone with RL " "knowledge to investigate further.")
@pytest.mark.skip(reason="Inconsistent results. Needs someone with RL knowledge to investigate further.")
@pytest.mark.parametrize(
"temp_primaite_session",
[[TEST_CONFIG_ROOT / "ppo_seeded_training_config.yaml", dos_very_basic_config_path()]],

View File

@@ -58,6 +58,10 @@ def run_generic_set_actions(env: Primaite):
)
def test_single_action_space_is_valid(temp_primaite_session):
"""Test single action space is valid."""
# TODO: Refactor this at some point to build a custom ACL Hardcoded
# Agent and then patch the AgentIdentifier Enum class so that it
# has ACL_AGENT. This then allows us to set the agent identified in
# the main config and is a bit cleaner.
with temp_primaite_session as session:
env = session.env
@@ -95,6 +99,10 @@ def test_single_action_space_is_valid(temp_primaite_session):
)
def test_agent_is_executing_actions_from_both_spaces(temp_primaite_session):
"""Test to ensure the blue agent is carrying out both kinds of operations (NODE & ACL)."""
# TODO: Refactor this at some point to build a custom ACL Hardcoded
# Agent and then patch the AgentIdentifier Enum class so that it
# has ACL_AGENT. This then allows us to set the agent identified in
# the main config and is a bit cleaner.
with temp_primaite_session as session:
env = session.env
# Run environment with specified fixed blue agent actions only