#1566 - Refactored the test_train_eval_episode_steps.py to sue TempPrimaiteSession.

- Fixed all errors that were caused b fixing the above.
- Some tests still fail, these are for SS to fix.
- Dropped the old run_generic stuff from conftest.py
This commit is contained in:
Chris McCarthy
2023-07-07 15:50:14 +01:00
parent 8edb26a65c
commit 1ddfca6459
15 changed files with 69 additions and 279 deletions

View File

@@ -153,12 +153,11 @@ class AgentSessionABC(ABC):
metadata_dict = json.load(file)
metadata_dict["end_datetime"] = datetime.now().isoformat()
if not self.is_eval:
metadata_dict["learning"]["total_episodes"] = self._env.episode_count # noqa
metadata_dict["learning"]["total_episodes"] = self._env.actual_episode_count # noqa
metadata_dict["learning"]["total_time_steps"] = self._env.total_step_count # noqa
else:
metadata_dict["evaluation"]["total_episodes"] = self._env.episode_count # noqa
metadata_dict["evaluation"]["total_episodes"] = self._env.actual_episode_count # noqa
metadata_dict["evaluation"]["total_time_steps"] = self._env.total_step_count # noqa
filepath = self.session_path / "session_metadata.json"
@@ -209,10 +208,11 @@ class AgentSessionABC(ABC):
:param kwargs: Any agent-specific key-word args to be passed.
"""
self._env.set_as_eval() # noqa
self.is_eval = True
self._plot_av_reward_per_episode(learning_session=False)
_LOGGER.info("Finished evaluation")
if self._can_evaluate:
self._plot_av_reward_per_episode(learning_session=False)
self._update_session_metadata_file()
self.is_eval = True
_LOGGER.info("Finished evaluation")
@abstractmethod
def _get_latest_checkpoint(self):

View File

@@ -85,8 +85,12 @@ class RLlibAgent(AgentSessionABC):
metadata_dict = json.load(file)
metadata_dict["end_datetime"] = datetime.now().isoformat()
metadata_dict["total_episodes"] = self._current_result["episodes_total"]
metadata_dict["total_time_steps"] = self._current_result["timesteps_total"]
if not self.is_eval:
metadata_dict["learning"]["total_episodes"] = self._current_result["episodes_total"] # noqa
metadata_dict["learning"]["total_time_steps"] = self._current_result["timesteps_total"] # noqa
else:
metadata_dict["evaluation"]["total_episodes"] = self._current_result["episodes_total"] # noqa
metadata_dict["evaluation"]["total_time_steps"] = self._current_result["timesteps_total"] # noqa
filepath = self.session_path / "session_metadata.json"
_LOGGER.debug(f"Updating Session Metadata file: {filepath}")
@@ -150,7 +154,6 @@ class RLlibAgent(AgentSessionABC):
super().learn()
def evaluate(
self,
**kwargs,

View File

@@ -58,7 +58,7 @@ class SB3Agent(AgentSessionABC):
PPOMlp,
self._env,
verbose=self.sb3_output_verbose_level,
n_steps=self._training_config.num_eval_steps,
n_steps=self._training_config.num_train_steps,
tensorboard_log=str(self._tensorboard_log_path),
seed=self._training_config.seed,
)
@@ -93,7 +93,7 @@ class SB3Agent(AgentSessionABC):
for i in range(episodes):
self._agent.learn(total_timesteps=time_steps)
self._save_checkpoint()
self._env.reset()
self._env._write_av_reward_per_episode() # noqa
self.save()
self._env.close()
super().learn()
@@ -129,7 +129,7 @@ class SB3Agent(AgentSessionABC):
if isinstance(action, np.ndarray):
action = np.int64(action)
obs, rewards, done, info = self._env.step(action)
self._env.reset()
self._env._write_av_reward_per_episode() # noqa
self._env.close()
super().evaluate()

View File

@@ -68,7 +68,7 @@ num_train_episodes: 10
num_train_steps: 256
# Number of episodes for evaluation to run per session
num_eval_episodes: 10
num_eval_episodes: 1
# Number of time_steps for evaluation per episode
num_eval_steps: 256

View File

@@ -66,7 +66,7 @@ class TrainingConfig:
num_train_steps: int = 256
"The number of steps in an episode during an training session"
num_eval_episodes: int = 10
num_eval_episodes: int = 1
"The number of episodes to train over during an evaluation session"
num_eval_steps: int = 256
@@ -242,10 +242,10 @@ class TrainingConfig:
tc += f"{self.hard_coded_agent_view}, "
tc += f"{self.action_type}, "
tc += f"observation_space={self.observation_space}, "
if self.session_type.name == "TRAIN":
if self.session_type is SessionType.TRAIN:
tc += f"{self.num_train_episodes} episodes @ "
tc += f"{self.num_train_steps} steps"
elif self.session_type.name == "EVAL":
elif self.session_type is SessionType.EVAL:
tc += f"{self.num_eval_episodes} episodes @ "
tc += f"{self.num_eval_steps} steps"
else:

View File

@@ -261,6 +261,11 @@ class Primaite(Env):
self.total_step_count = 0
self.episode_steps = self.training_config.num_eval_steps
def _write_av_reward_per_episode(self):
if self.actual_episode_count > 0:
csv_data = self.actual_episode_count, self.average_reward
self.episode_av_reward_writer.write(csv_data)
def reset(self):
"""
AI Gym Reset function.
@@ -268,10 +273,7 @@ class Primaite(Env):
Returns:
Environment observation space (reset)
"""
if self.actual_episode_count > 0:
csv_data = self.actual_episode_count, self.average_reward
self.episode_av_reward_writer.write(csv_data)
self._write_av_reward_per_episode()
self.episode_count += 1
# Don't need to reset links, as they are cleared and recalculated every

View File

@@ -90,7 +90,6 @@ def calculate_reward_function(
f"Penalty of {ier_reward} was NOT applied."
)
)
return reward_value

View File

@@ -16,5 +16,6 @@ def av_rewards_dict(av_rewards_csv_file: Union[str, Path]) -> Dict[int, float]:
:param av_rewards_csv_file: The average rewards per episode csv file path.
:return: The average rewards per episode cdv as a dict.
"""
d = pl.read_csv(av_rewards_csv_file).to_dict()
return {v: d["Average Reward"][i] for i, v in enumerate(d["Episode"])}
df = pl.read_csv(av_rewards_csv_file).to_dict()
return {v: df["Average Reward"][i] for i, v in enumerate(df["Episode"])}

View File

@@ -18,11 +18,6 @@
- name: ftp
port: '21'
state: GOOD
- item_type: POSITION
positions:
- node: '1'
x_pos: 309
y_pos: 78
- item_type: RED_POL
id: '1'
start_step: 1

View File

@@ -22,17 +22,13 @@ agent_identifier: DUMMY
# "ACL"
# "ANY" node and acl actions
action_type: NODE
# Number of episodes for training to run per session
num_train_episodes: 10
# Number of time_steps for training per episode
num_train_steps: 256
# Number of episodes for evaluation to run per session
num_eval_episodes: 10
num_eval_episodes: 1
# Number of time_steps for evaluation per episode
num_eval_steps: 256
num_eval_steps: 15
# Time delay between steps (for generic agents)
time_delay: 1

View File

@@ -52,20 +52,20 @@ observation_space:
# Number of episodes for training to run per session
num_train_episodes: 30
num_train_episodes: 3
# Number of time_steps for training per episode
num_train_steps: 1
num_train_steps: 25
# Number of episodes for evaluation to run per session
num_eval_episodes: 10
num_eval_episodes: 1
# Number of time_steps for evaluation per episode
num_eval_steps: 10
num_eval_steps: 17
# Sets how often the agent will save a checkpoint (every n time episodes).
# Set to 0 if no checkpoints are required. Default is 10
checkpoint_every_n_episodes: 10
checkpoint_every_n_episodes: 0
# Time delay (milliseconds) between steps for CUSTOM agents.
time_delay: 5
@@ -74,7 +74,7 @@ time_delay: 5
# "TRAIN" (Trains an agent)
# "EVAL" (Evaluates an agent)
# "TRAIN_EVAL" (Trains then evaluates an agent)
session_type: EVAL
session_type: TRAIN_EVAL
# Environment config values
# The high value for the observation space

View File

@@ -1,153 +0,0 @@
# Training Config File
# Sets which agent algorithm framework will be used.
# Options are:
# "SB3" (Stable Baselines3)
# "RLLIB" (Ray RLlib)
# "CUSTOM" (Custom Agent)
agent_framework: SB3
# Sets which deep learning framework will be used (by RLlib ONLY).
# Default is TF (Tensorflow).
# Options are:
# "TF" (Tensorflow)
# TF2 (Tensorflow 2.X)
# TORCH (PyTorch)
deep_learning_framework: TF2
# Sets which Agent class will be used.
# Options are:
# "A2C" (Advantage Actor Critic coupled with either SB3 or RLLIB agent_framework)
# "PPO" (Proximal Policy Optimization coupled with either SB3 or RLLIB agent_framework)
# "HARDCODED" (The HardCoded agents coupled with an ACL or NODE action_type)
# "DO_NOTHING" (The DoNothing agents coupled with an ACL or NODE action_type)
# "RANDOM" (primaite.agents.simple.RandomAgent)
# "DUMMY" (primaite.agents.simple.DummyAgent)
agent_identifier: PPO
# Sets whether Red Agent POL and IER is randomised.
# Options are:
# True
# False
random_red_agent: False
# Sets what view of the environment the deterministic hardcoded agent has. The default is BASIC.
# Options are:
# "BASIC" (The current observation space only)
# "FULL" (Full environment view with actions taken and reward feedback)
hard_coded_agent_view: FULL
# Sets How the Action Space is defined:
# "NODE"
# "ACL"
# "ANY" node and acl actions
action_type: NODE
# observation space
observation_space:
# flatten: true
components:
- name: NODE_LINK_TABLE
# - name: NODE_STATUSES
# - name: LINK_TRAFFIC_LEVELS
# Number of episodes for training to run per session
num_train_episodes: 30
# Number of time_steps for training per episode
num_train_steps: 1
# Number of episodes for evaluation to run per session
num_eval_episodes: 10
# Number of time_steps for evaluation per episode
num_eval_steps: 10
# Sets how often the agent will save a checkpoint (every n time episodes).
# Set to 0 if no checkpoints are required. Default is 10
checkpoint_every_n_episodes: 10
# Time delay (milliseconds) between steps for CUSTOM agents.
time_delay: 5
# Type of session to be run. Options are:
# "TRAIN" (Trains an agent)
# "EVAL" (Evaluates an agent)
# "TRAIN_EVAL" (Trains then evaluates an agent)
session_type: TRAIN
# Environment config values
# The high value for the observation space
observation_space_high_value: 1000000000
# The Stable Baselines3 learn/eval output verbosity level:
# Options are:
# "NONE" (No Output)
# "INFO" (Info Messages (such as devices and wrappers used))
# "DEBUG" (All Messages)
sb3_output_verbose_level: NONE
# Reward values
# Generic
all_ok: 0
# Node Hardware State
off_should_be_on: -10
off_should_be_resetting: -5
on_should_be_off: -2
on_should_be_resetting: -5
resetting_should_be_on: -5
resetting_should_be_off: -2
resetting: -3
# Node Software or Service State
good_should_be_patching: 2
good_should_be_compromised: 5
good_should_be_overwhelmed: 5
patching_should_be_good: -5
patching_should_be_compromised: 2
patching_should_be_overwhelmed: 2
patching: -3
compromised_should_be_good: -20
compromised_should_be_patching: -20
compromised_should_be_overwhelmed: -20
compromised: -20
overwhelmed_should_be_good: -20
overwhelmed_should_be_patching: -20
overwhelmed_should_be_compromised: -20
overwhelmed: -20
# Node File System State
good_should_be_repairing: 2
good_should_be_restoring: 2
good_should_be_corrupt: 5
good_should_be_destroyed: 10
repairing_should_be_good: -5
repairing_should_be_restoring: 2
repairing_should_be_corrupt: 2
repairing_should_be_destroyed: 0
repairing: -3
restoring_should_be_good: -10
restoring_should_be_repairing: -2
restoring_should_be_corrupt: 1
restoring_should_be_destroyed: 2
restoring: -6
corrupt_should_be_good: -10
corrupt_should_be_repairing: -10
corrupt_should_be_restoring: -10
corrupt_should_be_destroyed: 2
corrupt: -10
destroyed_should_be_good: -20
destroyed_should_be_repairing: -20
destroyed_should_be_restoring: -20
destroyed_should_be_corrupt: -20
destroyed: -20
scanning: -2
# IER status
red_ier_running: -5
green_ier_blocked: -10
# Patching / Reset durations
os_patching_duration: 5 # The time taken to patch the OS
node_reset_duration: 5 # The time taken to reset a node (hardware)
service_patching_duration: 5 # The time taken to patch a service
file_system_repairing_limit: 5 # The time take to repair the file system
file_system_restoring_limit: 5 # The time take to restore the file system
file_system_scanning_limit: 5 # The time taken to scan the file system

View File

@@ -1,17 +1,16 @@
# Crown Copyright (C) Dstl 2022. DEFCON 703. Shared in confidence.
import datetime
import json
import shutil
import tempfile
import time
from datetime import datetime
from pathlib import Path
from typing import Dict, Union
from typing import Any, Dict, Union
from unittest.mock import patch
import pytest
from primaite import getLogger
from primaite.common.enums import AgentIdentifier
from primaite.environment.primaite_env import Primaite
from primaite.primaite_session import PrimaiteSession
from primaite.utils.session_output_reader import av_rewards_dict
@@ -48,6 +47,11 @@ class TempPrimaiteSession(PrimaiteSession):
csv_file = f"average_reward_per_episode_{self.timestamp_str}.csv"
return av_rewards_dict(self.evaluation_path / csv_file)
def metadata_file_as_dict(self) -> Dict[str, Any]:
"""Read the session_metadata.json file and return as a dict."""
with open(self.session_path / "session_metadata.json", "r") as file:
return json.load(file)
@property
def env(self) -> Primaite:
"""Direct access to the env for ease of testing."""
@@ -58,6 +62,7 @@ class TempPrimaiteSession(PrimaiteSession):
def __exit__(self, type, value, tb):
shutil.rmtree(self.session_path)
shutil.rmtree(self.session_path.parent)
_LOGGER.debug(f"Deleted temp session directory: {self.session_path}")
@@ -129,59 +134,3 @@ def temp_session_path() -> Path:
session_path.mkdir(exist_ok=True, parents=True)
return session_path
def _get_primaite_env_from_config(
training_config_path: Union[str, Path],
lay_down_config_path: Union[str, Path],
temp_session_path,
):
"""Takes a config path and returns the created instance of Primaite."""
session_timestamp: datetime = datetime.now()
session_path = temp_session_path(session_timestamp)
timestamp_str = session_timestamp.strftime("%Y-%m-%d_%H-%M-%S")
env = Primaite(
training_config_path=training_config_path,
lay_down_config_path=lay_down_config_path,
session_path=session_path,
timestamp_str=timestamp_str,
)
config_values = env.training_config
config_values.num_steps = env.episode_steps
# TOOD: This needs t be refactored to happen outside. Should be part of
# a main Session class.
if env.training_config.agent_identifier is AgentIdentifier.RANDOM:
run_generic(env, config_values)
return env
def run_generic(env, config_values):
"""Run against a generic agent."""
# Reset the environment at the start of the episode
# env.reset()
print(config_values.num_train_episodes, "how many episodes")
for episode in range(0, config_values.num_train_episodes):
for step in range(0, config_values.num_train_steps):
# Send the observation space to the agent to get an action
# TEMP - random action for now
# action = env.blue_agent_action(obs)
# action = env.action_space.sample()
action = 0
# Run the simulation step on the live environment
obs, reward, done, info = env.step(action)
# Break if done is True
if done:
break
# Introduce a delay between steps
time.sleep(config_values.time_delay / 1000)
# Reset the environment at the end of the episode
# env.reset()
# env.close()

View File

@@ -1,7 +1,10 @@
import pytest
from primaite import getLogger
from tests import TEST_CONFIG_ROOT
_LOGGER = getLogger(__name__)
@pytest.mark.parametrize(
"temp_primaite_session",
@@ -44,7 +47,6 @@ def test_rewards_are_being_penalised_at_each_step_function(
Average Reward: -8 (-120 / 15)
"""
with temp_primaite_session as session:
session.evaluate()
session.close()
ev_rewards = session.eval_av_reward_per_episode_csv()
assert ev_rewards[1] == -8.0

View File

@@ -3,7 +3,6 @@ import pytest
from primaite import getLogger
from primaite.config.lay_down_config import dos_very_basic_config_path
from tests import TEST_CONFIG_ROOT
from tests.conftest import run_generic
_LOGGER = getLogger(__name__)
@@ -14,33 +13,30 @@ _LOGGER = getLogger(__name__)
indirect=True,
)
def test_eval_steps_differ_from_training(temp_primaite_session):
"""Uses PrimaiteSession class to compare number of episodes used for training and evaluation."""
with temp_primaite_session as train_session:
env = train_session.env
train_session.learn()
"""Uses PrimaiteSession class to compare number of episodes used for training and evaluation.
"""
Train_episode_step.yaml main config:
num_train_steps = 1
num_eval_steps = 10
When the YAML file changes from TRAIN to EVAL the episode step changes and uses the other config value.
The test is showing that 10 steps are running for evaluation and NOT 1 step as EVAL has been selected in the config.
num_train_steps = 25
num_train_episodes = 3
num_eval_steps = 17
num_eval_episodes = 1
"""
assert env.episode_steps == 10 # 30
# assert env.actual_episode_count == 10 # should be 10
expected_learning_metadata = {"total_episodes": 3, "total_time_steps": 75}
expected_evaluation_metadata = {"total_episodes": 1, "total_time_steps": 17}
with temp_primaite_session as session:
# Run learning and check episode and step counts
session.learn()
assert session.env.actual_episode_count == expected_learning_metadata["total_episodes"]
assert session.env.total_step_count == expected_learning_metadata["total_time_steps"]
@pytest.mark.parametrize(
"temp_primaite_session",
[[TEST_CONFIG_ROOT / "train_episode_step.yaml", dos_very_basic_config_path()]],
indirect=True,
)
def test_train_eval_config_option(temp_primaite_session):
"""Uses PrimaiteSession class to test number of episodes and steps used for TRAIN and EVAL option."""
with temp_primaite_session as train_session:
env = train_session.env
run_generic(env, env.training_config)
# Run evaluation and check episode and step counts
session.evaluate()
assert session.env.actual_episode_count == expected_evaluation_metadata["total_episodes"]
assert session.env.total_step_count == expected_evaluation_metadata["total_time_steps"]
print(env.actual_episode_count, env.step_count, env.total_step_count)
# Load the session_metadata.json file and check that the both the
# learning and evaluation match what is expected above
metadata = session.metadata_file_as_dict()
assert metadata["learning"] == expected_learning_metadata
assert metadata["evaluation"] == expected_evaluation_metadata