#901 - Dropped temp_primaite_sessiion_2 from conftest.py.
- Re-added the hard-coded mean rewards per episode values from a rpe-trained agent to the deterministic test in test_seeding_and_deterministic_session.py - Partially tidies up some tests in test_observation_space.py; Still some work to be done on this at a later date.
This commit is contained in:
@@ -35,7 +35,7 @@ random_red_agent: False
|
||||
# Default is None (null)
|
||||
seed: null
|
||||
|
||||
# Set whether the agent will be deterministic instead of stochastic
|
||||
# Set whether the agent evaluation will be deterministic instead of stochastic
|
||||
# Options are:
|
||||
# True
|
||||
# False
|
||||
|
||||
@@ -35,7 +35,7 @@ random_red_agent: False
|
||||
# Default is None (null)
|
||||
seed: None
|
||||
|
||||
# Set whether the agent will be deterministic instead of stochastic
|
||||
# Set whether the agent evaluation will be deterministic instead of stochastic
|
||||
# Options are:
|
||||
# True
|
||||
# False
|
||||
|
||||
@@ -35,7 +35,7 @@ random_red_agent: False
|
||||
# Default is None (null)
|
||||
seed: 67890
|
||||
|
||||
# Set whether the agent will be deterministic instead of stochastic
|
||||
# Set whether the agent evaluation will be deterministic instead of stochastic
|
||||
# Options are:
|
||||
# True
|
||||
# False
|
||||
@@ -66,7 +66,7 @@ num_train_episodes: 10
|
||||
num_train_steps: 256
|
||||
|
||||
# Number of episodes to run per session
|
||||
num_eval_episodes: 1
|
||||
num_eval_episodes: 5
|
||||
|
||||
# Number of time_steps per episode
|
||||
num_eval_steps: 256
|
||||
|
||||
@@ -119,60 +119,6 @@ def temp_primaite_session(request):
|
||||
return TempPrimaiteSession(training_config_path, lay_down_config_path)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def temp_primaite_session_2(request):
|
||||
"""
|
||||
Provides a temporary PrimaiteSession instance.
|
||||
|
||||
It's temporary as it uses a temporary directory as the session path.
|
||||
|
||||
To use this fixture you need to:
|
||||
|
||||
- parametrize your test function with:
|
||||
|
||||
- "temp_primaite_session"
|
||||
- [[path to training config, path to lay down config]]
|
||||
- Include the temp_primaite_session fixture as a param in your test
|
||||
function.
|
||||
- use the temp_primaite_session as a context manager assigning is the
|
||||
name 'session'.
|
||||
|
||||
.. code:: python
|
||||
|
||||
from primaite.config.lay_down_config import dos_very_basic_config_path
|
||||
from primaite.config.training_config import main_training_config_path
|
||||
@pytest.mark.parametrize(
|
||||
"temp_primaite_session",
|
||||
[
|
||||
[main_training_config_path(), dos_very_basic_config_path()]
|
||||
],
|
||||
indirect=True
|
||||
)
|
||||
def test_primaite_session(temp_primaite_session):
|
||||
with temp_primaite_session as session:
|
||||
# Learning outputs are saved in session.learning_path
|
||||
session.learn()
|
||||
|
||||
# Evaluation outputs are saved in session.evaluation_path
|
||||
session.evaluate()
|
||||
|
||||
# To ensure that all files are written, you must call .close()
|
||||
session.close()
|
||||
|
||||
# If you need to inspect any session outputs, it must be done
|
||||
# inside the context manager
|
||||
|
||||
# Now that we've exited the context manager, the
|
||||
# session.session_path directory and its contents are deleted
|
||||
"""
|
||||
training_config_path = request.param[0]
|
||||
lay_down_config_path = request.param[1]
|
||||
with patch("primaite.agents.agent.get_session_path", get_temp_session_path) as mck:
|
||||
mck.session_timestamp = datetime.now()
|
||||
|
||||
return TempPrimaiteSession(training_config_path, lay_down_config_path)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def temp_session_path() -> Path:
|
||||
"""
|
||||
|
||||
@@ -4,41 +4,9 @@ import numpy as np
|
||||
import pytest
|
||||
|
||||
from primaite.environment.observations import NodeLinkTable, NodeStatuses, ObservationsHandler
|
||||
from primaite.environment.primaite_env import Primaite
|
||||
from tests import TEST_CONFIG_ROOT
|
||||
|
||||
|
||||
def run_generic_set_actions(env: Primaite):
|
||||
"""Run against a generic agent with specified blue agent actions."""
|
||||
# Reset the environment at the start of the episode
|
||||
# env.reset()
|
||||
training_config = env.training_config
|
||||
for episode in range(0, training_config.num_train_episodes):
|
||||
for step in range(0, training_config.num_train_steps):
|
||||
# Send the observation space to the agent to get an action
|
||||
# TEMP - random action for now
|
||||
# action = env.blue_agent_action(obs)
|
||||
action = 0
|
||||
print("Episode:", episode, "\nStep:", step)
|
||||
if step == 2:
|
||||
# [1, 1, 2, 1, 1, 1, 1(position)]
|
||||
# NEED [1, 1, 1, 2, 1, 1, 1]
|
||||
# Creates an ACL rule
|
||||
# Allows traffic from server_1 to node_1 on port FTP
|
||||
action = 43
|
||||
elif step == 4:
|
||||
action = 96
|
||||
|
||||
# Run the simulation step on the live environment
|
||||
obs, reward, done, info = env.step(action)
|
||||
|
||||
# Break if done is True
|
||||
if done:
|
||||
break
|
||||
|
||||
return env
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"temp_primaite_session",
|
||||
[
|
||||
@@ -317,13 +285,9 @@ class TestAccessControlList:
|
||||
assert np.array_equal(obs, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2])
|
||||
|
||||
def test_observation_space_with_implicit_rule(self, temp_primaite_session):
|
||||
"""Test observation space is what is expected when an agent adds ACLs during an episode."""
|
||||
# Used to use env from test fixture but AtrributeError function object has no 'training_config'
|
||||
with temp_primaite_session as session:
|
||||
env = session.env
|
||||
env = run_generic_set_actions(env)
|
||||
obs = env.env_obs
|
||||
"""
|
||||
Test observation space is what is expected when an agent adds ACLs during an episode.
|
||||
|
||||
Observation space at the end of the episode.
|
||||
At the start of the episode, there is a single implicit Deny rule = 1,1,1,1,1,0
|
||||
(0 represents its initial position at top of ACL list)
|
||||
@@ -333,6 +297,38 @@ class TestAccessControlList:
|
||||
On Step 7, there is a second rule added at POSITION 1: 2,4,2,3,3,1
|
||||
THINK THE RULES SHOULD BE THE OTHER WAY AROUND IN THE CURRENT OBSERVATION
|
||||
"""
|
||||
# TODO: Refactor this at some point to build a custom ACL Hardcoded
|
||||
# Agent and then patch the AgentIdentifier Enum class so that it
|
||||
# has ACL_AGENT. This then allows us to set the agent identified in
|
||||
# the main config and is a bit cleaner.
|
||||
# Used to use env from test fixture but AtrributeError function object has no 'training_config'
|
||||
with temp_primaite_session as session:
|
||||
env = session.env
|
||||
|
||||
training_config = env.training_config
|
||||
for episode in range(0, training_config.num_train_episodes):
|
||||
for step in range(0, training_config.num_train_steps):
|
||||
# Send the observation space to the agent to get an action
|
||||
# TEMP - random action for now
|
||||
# action = env.blue_agent_action(obs)
|
||||
action = 0
|
||||
print("Episode:", episode, "\nStep:", step)
|
||||
if step == 2:
|
||||
# [1, 1, 2, 1, 1, 1, 1(position)]
|
||||
# NEED [1, 1, 1, 2, 1, 1, 1]
|
||||
# Creates an ACL rule
|
||||
# Allows traffic from server_1 to node_1 on port FTP
|
||||
action = 43
|
||||
elif step == 4:
|
||||
action = 96
|
||||
|
||||
# Run the simulation step on the live environment
|
||||
obs, reward, done, info = env.step(action)
|
||||
|
||||
# Break if done is True
|
||||
if done:
|
||||
break
|
||||
obs = env.env_obs
|
||||
print("what i am testing", obs)
|
||||
# acl rule 1
|
||||
# source is 1 should be 4
|
||||
|
||||
@@ -1,5 +1,3 @@
|
||||
import time
|
||||
|
||||
import pytest as pytest
|
||||
|
||||
from primaite.config.lay_down_config import dos_very_basic_config_path
|
||||
@@ -11,45 +9,45 @@ from tests import TEST_CONFIG_ROOT
|
||||
[[TEST_CONFIG_ROOT / "ppo_seeded_training_config.yaml", dos_very_basic_config_path()]],
|
||||
indirect=True,
|
||||
)
|
||||
@pytest.mark.parametrize(
|
||||
"temp_primaite_session_2",
|
||||
[[TEST_CONFIG_ROOT / "ppo_seeded_training_config.yaml", dos_very_basic_config_path()]],
|
||||
indirect=True,
|
||||
)
|
||||
def test_seeded_learning(temp_primaite_session, temp_primaite_session_2):
|
||||
"""Test running seeded learning produces the same output when ran twice."""
|
||||
def test_seeded_learning(temp_primaite_session):
|
||||
"""
|
||||
Test running seeded learning produces the same output when ran twice.
|
||||
|
||||
.. note::
|
||||
|
||||
If this is failing, the hard-coded expected_mean_reward_per_episode
|
||||
from a pre-trained agent will probably need to be updated. If the
|
||||
env changes and those changed how this agent is trained, chances are
|
||||
the mean rewards are going to be different.
|
||||
|
||||
Run the test, but print out the session.learn_av_reward_per_episode()
|
||||
before comparing it. Then copy the printed dict and replace the
|
||||
expected_mean_reward_per_episode with those values. The test should
|
||||
now work. If not, then you've got a bug :).
|
||||
"""
|
||||
expected_mean_reward_per_episode = {
|
||||
1: -90.703125,
|
||||
2: -91.15234375,
|
||||
3: -87.5,
|
||||
4: -92.2265625,
|
||||
5: -94.6875,
|
||||
6: -91.19140625,
|
||||
7: -88.984375,
|
||||
8: -88.3203125,
|
||||
9: -112.79296875,
|
||||
10: -100.01953125,
|
||||
1: -33.90625,
|
||||
2: -32.32421875,
|
||||
3: -25.234375,
|
||||
4: -30.15625,
|
||||
5: -27.1484375,
|
||||
6: -29.609375,
|
||||
7: -29.921875,
|
||||
8: -29.3359375,
|
||||
9: -28.046875,
|
||||
10: -27.24609375,
|
||||
}
|
||||
"""
|
||||
|
||||
with temp_primaite_session as session:
|
||||
assert session._training_config.seed == 67890, (
|
||||
"Expected output is based upon a agent that was trained with " "seed 67890"
|
||||
)
|
||||
assert (
|
||||
session._training_config.seed == 67890
|
||||
), "Expected output is based upon a agent that was trained with seed 67890"
|
||||
session.learn()
|
||||
actual_mean_reward_per_episode_run_1 = session.learn_av_reward_per_episode()
|
||||
time.sleep(2)
|
||||
with temp_primaite_session_2 as session:
|
||||
assert session._training_config.seed == 67890, (
|
||||
"Expected output is based upon a agent that was trained with " "seed 67890"
|
||||
)
|
||||
session.learn()
|
||||
actual_mean_reward_per_episode_run_2 = session.learn_av_reward_per_episode()
|
||||
|
||||
assert actual_mean_reward_per_episode_run_1 == actual_mean_reward_per_episode_run_2
|
||||
assert expected_mean_reward_per_episode == session.learn_av_reward_per_episode()
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="Inconsistent results. Needs someone with RL " "knowledge to investigate further.")
|
||||
@pytest.mark.skip(reason="Inconsistent results. Needs someone with RL knowledge to investigate further.")
|
||||
@pytest.mark.parametrize(
|
||||
"temp_primaite_session",
|
||||
[[TEST_CONFIG_ROOT / "ppo_seeded_training_config.yaml", dos_very_basic_config_path()]],
|
||||
|
||||
@@ -58,6 +58,10 @@ def run_generic_set_actions(env: Primaite):
|
||||
)
|
||||
def test_single_action_space_is_valid(temp_primaite_session):
|
||||
"""Test single action space is valid."""
|
||||
# TODO: Refactor this at some point to build a custom ACL Hardcoded
|
||||
# Agent and then patch the AgentIdentifier Enum class so that it
|
||||
# has ACL_AGENT. This then allows us to set the agent identified in
|
||||
# the main config and is a bit cleaner.
|
||||
with temp_primaite_session as session:
|
||||
env = session.env
|
||||
|
||||
@@ -95,6 +99,10 @@ def test_single_action_space_is_valid(temp_primaite_session):
|
||||
)
|
||||
def test_agent_is_executing_actions_from_both_spaces(temp_primaite_session):
|
||||
"""Test to ensure the blue agent is carrying out both kinds of operations (NODE & ACL)."""
|
||||
# TODO: Refactor this at some point to build a custom ACL Hardcoded
|
||||
# Agent and then patch the AgentIdentifier Enum class so that it
|
||||
# has ACL_AGENT. This then allows us to set the agent identified in
|
||||
# the main config and is a bit cleaner.
|
||||
with temp_primaite_session as session:
|
||||
env = session.env
|
||||
# Run environment with specified fixed blue agent actions only
|
||||
|
||||
Reference in New Issue
Block a user