901 - changed acl current obs from list to numpy.array, changed default ACL list in training_config.py to FALSE, and tried to make test_seeding_and_deterministic_session.py test without fixed reward results

This commit is contained in:
SunilSamra
2023-07-13 11:04:11 +01:00
parent 06c20f6984
commit f8cb18c654
7 changed files with 37 additions and 28 deletions

View File

@@ -94,13 +94,13 @@ class TrainingConfig:
"Stable Baselines3 learn/eval output verbosity level"
# Access Control List/Rules
apply_implicit_rule: str = True
apply_implicit_rule: str = False
"User choice to have Implicit ALLOW or DENY."
implicit_acl_rule: RulePermissionType = RulePermissionType.DENY
"ALLOW or DENY implicit firewall rule to go at the end of list of ACL list."
max_number_acl_rules: int = 0
max_number_acl_rules: int = 10
"Sets a limit for number of acl rules allowed in the list and environment."
# Reward values

View File

@@ -519,30 +519,26 @@ class AccessControlList(AbstractObservationComponent):
port_int = self.env.ports_list.index(port) + 2
else:
_LOGGER.info(f"Port {port} could not be found.")
port_int = None
# Either do the multiply on the obs space
# Change the obs to
items_to_add = [
permission_int,
source_ip_int,
dest_ip_int,
protocol_int,
port_int,
position,
]
position = position * 6
for item in items_to_add:
# print("position", position, "\nitem", int(item))
obs.insert(position, int(item))
position += 1
obs.extend(
[
permission_int,
source_ip_int,
dest_ip_int,
protocol_int,
port_int,
position,
]
)
else:
starting_position = index * 6
for placeholder in range(6):
obs.insert(starting_position, 0)
starting_position += 1
obs.extend([0, 0, 0, 0, 0, 0])
# print("current obs", obs, "\n" ,len(obs))
self.current_observation = obs
self.current_observation[:] = obs
def generate_structure(self):
"""Return a list of labels for the components of the flattened observation space."""

View File

@@ -5,7 +5,8 @@
# "STABLE_BASELINES3_PPO"
# "STABLE_BASELINES3_A2C"
# "GENERIC"
agent_identifier: STABLE_BASELINES3_A2C
agent_framework: SB3
agent_identifier: PPO
# Sets How the Action Space is defined:
# "NODE"
# "ACL"
@@ -21,7 +22,7 @@ apply_implicit_rule: True
# Implicit ACL firewall rule at end of lists to be default action or no rule can be selected (ALLOW or DENY)
implicit_acl_rule: DENY
# Total number of ACL rules allowed in the environment
max_number_acl_rules: 10
max_number_acl_rules: 3
observation_space:
components:
@@ -31,7 +32,7 @@ observation_space:
time_delay: 1
# Type of session to be run (TRAINING or EVALUATION)
session_type: TRAINING
session_type: TRAIN
# Determine whether to load an agent from file
load_agent: False
# File path and file name of agent if you're loading one in

View File

@@ -39,6 +39,8 @@ agent_load_file: C:\[Path]\[agent_saved_filename.zip]
# The high value for the observation space
observation_space_high_value: 1000000000
# Choice whether to have an ALLOW or DENY implicit rule or not (TRUE or FALSE)
apply_implicit_rule: True
implicit_acl_rule: DENY
max_number_acl_rules: 10
# Reward values

View File

@@ -58,6 +58,7 @@ class TempPrimaiteSession(PrimaiteSession):
def __exit__(self, type, value, tb):
shutil.rmtree(self.session_path)
# shutil.rmtree(self.session_path.parent)
_LOGGER.debug(f"Deleted temp session directory: {self.session_path}")

View File

@@ -257,7 +257,7 @@ class TestLinkTrafficLevels:
"temp_primaite_session",
[
[
TEST_CONFIG_ROOT / "single_action_space_fixed_blue_actions_main_config.yaml",
TEST_CONFIG_ROOT / "obs_tests/main_config_ACCESS_CONTROL_LIST.yaml",
TEST_CONFIG_ROOT / "obs_tests/laydown_ACL.yaml",
]
],
@@ -273,7 +273,7 @@ class TestAccessControlList:
env.update_environent_obs()
# we have two ACLs
assert env.env_obs.shape == (6 * 3)
assert env.env_obs.shape == (18,)
def test_values(self, temp_primaite_session):
"""Test that traffic values are encoded correctly.
@@ -296,7 +296,7 @@ class TestAccessControlList:
# therefore the first and third elements should be 6 and all others 0
# (`7` corresponds to 100% utiilsation and `6` corresponds to 87.5%-100%)
print(obs)
assert np.array_equal(obs, [])
assert np.array_equal(obs, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2])
def test_observation_space_with_implicit_rule(self, temp_primaite_session):
"""Test observation space is what is expected when an agent adds ACLs during an episode."""

View File

@@ -11,6 +11,7 @@ from tests import TEST_CONFIG_ROOT
)
def test_seeded_learning(temp_primaite_session):
"""Test running seeded learning produces the same output when ran twice."""
"""
expected_mean_reward_per_episode = {
1: -90.703125,
2: -91.15234375,
@@ -23,14 +24,22 @@ def test_seeded_learning(temp_primaite_session):
9: -112.79296875,
10: -100.01953125,
}
"""
with temp_primaite_session as session:
assert session._training_config.seed == 67890, (
"Expected output is based upon a agent that was trained with " "seed 67890"
)
session.learn()
actual_mean_reward_per_episode = session.learn_av_reward_per_episode()
actual_mean_reward_per_episode_run_1 = session.learn_av_reward_per_episode()
assert actual_mean_reward_per_episode == expected_mean_reward_per_episode
with temp_primaite_session as session:
assert session._training_config.seed == 67890, (
"Expected output is based upon a agent that was trained with " "seed 67890"
)
session.learn()
actual_mean_reward_per_episode_run_2 = session.learn_av_reward_per_episode()
assert actual_mean_reward_per_episode_run_1 == actual_mean_reward_per_episode_run_2
@pytest.mark.skip(reason="Inconsistent results. Needs someone with RL " "knowledge to investigate further.")