diff --git a/.gitignore b/.gitignore index b65d1fd8..4bb700b2 100644 --- a/.gitignore +++ b/.gitignore @@ -138,4 +138,8 @@ dmypy.json # Cython debug symbols cython_debug/ +# IDE .idea/ + +# outputs +src/primaite/outputs/ diff --git a/README.md b/README.md index 78f36fba..f7c6efd7 100644 --- a/README.md +++ b/README.md @@ -1 +1,64 @@ # PrimAITE + +## Getting Started with PrimAITE + +### Pre-Requisites + +In order to get **PrimAITE** installed, you will need to have the following installed: + +- `python3.8+` +- `python3-pip` +- `virtualenv` + +**PrimAITE** is designed to be OS-agnostic, and thus should work on most variations/distros of Linux, Windows, and MacOS. + +### Installation from source +#### 1. Navigate to the PrimAITE folder and create a new python virtual environment (venv) + +```unix +python3 -m venv +``` + +#### 2. Activate the venv + +##### Unix +```bash +source /bin/activate +``` + +##### Windows +```powershell +.\\Scripts\activate +``` + +#### 3. Install `primaite` into the venv along with all of it's dependencies + +```bash +python3 -m pip install -e . +``` + +### Development Installation +To install the development dependencies, postfix the command in step 3 above with the `[dev]` extra. Example: + +```bash +python3 -m pip install -e .[dev] +``` + +## Building documentation +The PrimAITE documentation can be built with the following commands: + +##### Unix +```bash +cd docs +make html +``` + +##### Windows +```powershell +cd docs +.\make.bat html +``` + +This will build the documentation as a collection of HTML files which uses the Read The Docs sphinx theme. Other build +options are available but may require additional dependencies such as LaTeX and PDF. Please refer to the Sphinx documentation +for your specific output requirements. diff --git a/docs/source/config.rst b/docs/source/config.rst index 164a75e1..c80baa3c 100644 --- a/docs/source/config.rst +++ b/docs/source/config.rst @@ -308,6 +308,14 @@ Rewards are calculated based on the difference between the current state and ref The number of steps to take when scanning the file system +* **deterministic** [bool] + + Set to true if the agent evaluation should be deterministic. Default is ``False`` + +* **seed** [int] + + Seed used in the randomisation in agent training. Default is ``None`` + The Lay Down Config ******************* diff --git a/src/primaite/agents/rllib.py b/src/primaite/agents/rllib.py index 427072c4..0bc41762 100644 --- a/src/primaite/agents/rllib.py +++ b/src/primaite/agents/rllib.py @@ -108,6 +108,7 @@ class RLlibAgent(AgentSessionABC): timestamp_str=self.timestamp_str, ), ) + self._agent_config.seed = self._training_config.seed self._agent_config.training(train_batch_size=self._training_config.num_steps) self._agent_config.framework(framework="tf") @@ -146,8 +147,12 @@ class RLlibAgent(AgentSessionABC): self._save_checkpoint() self.save() self._agent.stop() + super().learn() + # save agent + self.save() + def evaluate( self, **kwargs, diff --git a/src/primaite/agents/sb3.py b/src/primaite/agents/sb3.py index 18e208e4..aa8e312d 100644 --- a/src/primaite/agents/sb3.py +++ b/src/primaite/agents/sb3.py @@ -59,6 +59,7 @@ class SB3Agent(AgentSessionABC): verbose=self.sb3_output_verbose_level, n_steps=self._training_config.num_steps, tensorboard_log=str(self._tensorboard_log_path), + seed=self._training_config.seed, ) def _save_checkpoint(self): @@ -96,22 +97,23 @@ class SB3Agent(AgentSessionABC): self._env.close() super().learn() + # save agent + self.save() + def evaluate( self, - deterministic: bool = True, **kwargs, ): """ Evaluate the agent. - :param deterministic: Whether the evaluation is deterministic. :param kwargs: Any agent-specific key-word args to be passed. """ time_steps = self._training_config.num_steps episodes = self._training_config.num_episodes self._env.set_as_eval() self.is_eval = True - if deterministic: + if self._training_config.deterministic: deterministic_str = "deterministic" else: deterministic_str = "non-deterministic" @@ -122,7 +124,7 @@ class SB3Agent(AgentSessionABC): obs = self._env.reset() for step in range(time_steps): - action, _states = self._agent.predict(obs, deterministic=deterministic) + action, _states = self._agent.predict(obs, deterministic=self._training_config.deterministic) if isinstance(action, np.ndarray): action = np.int64(action) obs, rewards, done, info = self._env.step(action) diff --git a/src/primaite/config/_package_data/training/training_config_main.yaml b/src/primaite/config/_package_data/training/training_config_main.yaml index 7e7f239d..15adc4dd 100644 --- a/src/primaite/config/_package_data/training/training_config_main.yaml +++ b/src/primaite/config/_package_data/training/training_config_main.yaml @@ -31,6 +31,16 @@ agent_identifier: PPO # False random_red_agent: False +# The (integer) seed to be used in random number generation +# Default is None (null) +seed: null + +# Set whether the agent will be deterministic instead of stochastic +# Options are: +# True +# False +deterministic: False + # Sets what view of the environment the deterministic hardcoded agent has. The default is BASIC. # Options are: # "BASIC" (The current observation space only) diff --git a/src/primaite/config/training_config.py b/src/primaite/config/training_config.py index a89d8c4b..e7b701c7 100644 --- a/src/primaite/config/training_config.py +++ b/src/primaite/config/training_config.py @@ -178,6 +178,12 @@ class TrainingConfig: file_system_scanning_limit: int = 5 "The time taken to scan the file system" + deterministic: bool = False + "If true, the training will be deterministic" + + seed: Optional[int] = None + "The random number generator seed to be used while training the agent" + @classmethod def from_dict(cls, config_dict: Dict[str, Union[str, int, bool]]) -> TrainingConfig: """ diff --git a/src/primaite/config/_package_data/training/training_config_random_red_agent.yaml b/tests/config/ppo_not_seeded_training_config.yaml similarity index 54% rename from src/primaite/config/_package_data/training/training_config_random_red_agent.yaml rename to tests/config/ppo_not_seeded_training_config.yaml index 1ccc7c38..23cff44e 100644 --- a/src/primaite/config/_package_data/training/training_config_random_red_agent.yaml +++ b/tests/config/ppo_not_seeded_training_config.yaml @@ -1,40 +1,94 @@ -# Main Config File +# Training Config File -# Generic config values -# Choose one of these (dependent on Agent being trained) -# "STABLE_BASELINES3_PPO" -# "STABLE_BASELINES3_A2C" -# "GENERIC" -agent_identifier: STABLE_BASELINES3_A2C +# Sets which agent algorithm framework will be used. +# Options are: +# "SB3" (Stable Baselines3) +# "RLLIB" (Ray RLlib) +# "CUSTOM" (Custom Agent) +agent_framework: SB3 + +# Sets which deep learning framework will be used (by RLlib ONLY). +# Default is TF (Tensorflow). +# Options are: +# "TF" (Tensorflow) +# TF2 (Tensorflow 2.X) +# TORCH (PyTorch) +deep_learning_framework: TF2 + +# Sets which Agent class will be used. +# Options are: +# "A2C" (Advantage Actor Critic coupled with either SB3 or RLLIB agent_framework) +# "PPO" (Proximal Policy Optimization coupled with either SB3 or RLLIB agent_framework) +# "HARDCODED" (The HardCoded agents coupled with an ACL or NODE action_type) +# "DO_NOTHING" (The DoNothing agents coupled with an ACL or NODE action_type) +# "RANDOM" (primaite.agents.simple.RandomAgent) +# "DUMMY" (primaite.agents.simple.DummyAgent) +agent_identifier: PPO # Sets whether Red Agent POL and IER is randomised. # Options are: # True # False -random_red_agent: True +random_red_agent: False + +# The (integer) seed to be used in random number generation +# Default is None (null) +seed: None + +# Set whether the agent will be deterministic instead of stochastic +# Options are: +# True +# False +deterministic: False + +# Sets what view of the environment the deterministic hardcoded agent has. The default is BASIC. +# Options are: +# "BASIC" (The current observation space only) +# "FULL" (Full environment view with actions taken and reward feedback) +hard_coded_agent_view: FULL # Sets How the Action Space is defined: # "NODE" # "ACL" # "ANY" node and acl actions action_type: NODE +# observation space +observation_space: + # flatten: true + components: + - name: NODE_LINK_TABLE + # - name: NODE_STATUSES + # - name: LINK_TRAFFIC_LEVELS # Number of episodes to run per session num_episodes: 10 + # Number of time_steps per episode num_steps: 256 -# Time delay between steps (for generic agents) -time_delay: 10 -# Type of session to be run (TRAINING or EVALUATION) -session_type: TRAINING -# Determine whether to load an agent from file -load_agent: False -# File path and file name of agent if you're loading one in -agent_load_file: C:\[Path]\[agent_saved_filename.zip] + +# Sets how often the agent will save a checkpoint (every n time episodes). +# Set to 0 if no checkpoints are required. Default is 10 +checkpoint_every_n_episodes: 0 + +# Time delay (milliseconds) between steps for CUSTOM agents. +time_delay: 5 + +# Type of session to be run. Options are: +# "TRAIN" (Trains an agent) +# "EVAL" (Evaluates an agent) +# "TRAIN_EVAL" (Trains then evaluates an agent) +session_type: TRAIN_EVAL # Environment config values # The high value for the observation space observation_space_high_value: 1000000000 +# The Stable Baselines3 learn/eval output verbosity level: +# Options are: +# "NONE" (No Output) +# "INFO" (Info Messages (such as devices and wrappers used)) +# "DEBUG" (All Messages) +sb3_output_verbose_level: NONE + # Reward values # Generic all_ok: 0.0000 diff --git a/tests/config/ppo_seeded_training_config.yaml b/tests/config/ppo_seeded_training_config.yaml new file mode 100644 index 00000000..181331d9 --- /dev/null +++ b/tests/config/ppo_seeded_training_config.yaml @@ -0,0 +1,155 @@ +# Training Config File + +# Sets which agent algorithm framework will be used. +# Options are: +# "SB3" (Stable Baselines3) +# "RLLIB" (Ray RLlib) +# "CUSTOM" (Custom Agent) +agent_framework: SB3 + +# Sets which deep learning framework will be used (by RLlib ONLY). +# Default is TF (Tensorflow). +# Options are: +# "TF" (Tensorflow) +# TF2 (Tensorflow 2.X) +# TORCH (PyTorch) +deep_learning_framework: TF2 + +# Sets which Agent class will be used. +# Options are: +# "A2C" (Advantage Actor Critic coupled with either SB3 or RLLIB agent_framework) +# "PPO" (Proximal Policy Optimization coupled with either SB3 or RLLIB agent_framework) +# "HARDCODED" (The HardCoded agents coupled with an ACL or NODE action_type) +# "DO_NOTHING" (The DoNothing agents coupled with an ACL or NODE action_type) +# "RANDOM" (primaite.agents.simple.RandomAgent) +# "DUMMY" (primaite.agents.simple.DummyAgent) +agent_identifier: PPO + +# Sets whether Red Agent POL and IER is randomised. +# Options are: +# True +# False +random_red_agent: False + +# The (integer) seed to be used in random number generation +# Default is None (null) +seed: 67890 + +# Set whether the agent will be deterministic instead of stochastic +# Options are: +# True +# False +deterministic: True + +# Sets what view of the environment the deterministic hardcoded agent has. The default is BASIC. +# Options are: +# "BASIC" (The current observation space only) +# "FULL" (Full environment view with actions taken and reward feedback) +hard_coded_agent_view: FULL + +# Sets How the Action Space is defined: +# "NODE" +# "ACL" +# "ANY" node and acl actions +action_type: NODE +# observation space +observation_space: + # flatten: true + components: + - name: NODE_LINK_TABLE + # - name: NODE_STATUSES + # - name: LINK_TRAFFIC_LEVELS +# Number of episodes to run per session +num_episodes: 10 + +# Number of time_steps per episode +num_steps: 256 + +# Sets how often the agent will save a checkpoint (every n time episodes). +# Set to 0 if no checkpoints are required. Default is 10 +checkpoint_every_n_episodes: 0 + +# Time delay (milliseconds) between steps for CUSTOM agents. +time_delay: 5 + +# Type of session to be run. Options are: +# "TRAIN" (Trains an agent) +# "EVAL" (Evaluates an agent) +# "TRAIN_EVAL" (Trains then evaluates an agent) +session_type: TRAIN_EVAL + +# Environment config values +# The high value for the observation space +observation_space_high_value: 1000000000 + +# The Stable Baselines3 learn/eval output verbosity level: +# Options are: +# "NONE" (No Output) +# "INFO" (Info Messages (such as devices and wrappers used)) +# "DEBUG" (All Messages) +sb3_output_verbose_level: NONE + +# Reward values +# Generic +all_ok: 0 +# Node Hardware State +off_should_be_on: -10 +off_should_be_resetting: -5 +on_should_be_off: -2 +on_should_be_resetting: -5 +resetting_should_be_on: -5 +resetting_should_be_off: -2 +resetting: -3 +# Node Software or Service State +good_should_be_patching: 2 +good_should_be_compromised: 5 +good_should_be_overwhelmed: 5 +patching_should_be_good: -5 +patching_should_be_compromised: 2 +patching_should_be_overwhelmed: 2 +patching: -3 +compromised_should_be_good: -20 +compromised_should_be_patching: -20 +compromised_should_be_overwhelmed: -20 +compromised: -20 +overwhelmed_should_be_good: -20 +overwhelmed_should_be_patching: -20 +overwhelmed_should_be_compromised: -20 +overwhelmed: -20 +# Node File System State +good_should_be_repairing: 2 +good_should_be_restoring: 2 +good_should_be_corrupt: 5 +good_should_be_destroyed: 10 +repairing_should_be_good: -5 +repairing_should_be_restoring: 2 +repairing_should_be_corrupt: 2 +repairing_should_be_destroyed: 0 +repairing: -3 +restoring_should_be_good: -10 +restoring_should_be_repairing: -2 +restoring_should_be_corrupt: 1 +restoring_should_be_destroyed: 2 +restoring: -6 +corrupt_should_be_good: -10 +corrupt_should_be_repairing: -10 +corrupt_should_be_restoring: -10 +corrupt_should_be_destroyed: 2 +corrupt: -10 +destroyed_should_be_good: -20 +destroyed_should_be_repairing: -20 +destroyed_should_be_restoring: -20 +destroyed_should_be_corrupt: -20 +destroyed: -20 +scanning: -2 +# IER status +red_ier_running: -5 +green_ier_blocked: -10 + +# Patching / Reset durations +os_patching_duration: 5 # The time taken to patch the OS +node_reset_duration: 5 # The time taken to reset a node (hardware) +service_patching_duration: 5 # The time taken to patch a service +file_system_repairing_limit: 5 # The time take to repair the file system +file_system_restoring_limit: 5 # The time take to restore the file system +file_system_scanning_limit: 5 # The time taken to scan the file system diff --git a/tests/conftest.py b/tests/conftest.py index af76b314..388bc034 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -58,7 +58,6 @@ class TempPrimaiteSession(PrimaiteSession): def __exit__(self, type, value, tb): shutil.rmtree(self.session_path) - shutil.rmtree(self.session_path.parent) _LOGGER.debug(f"Deleted temp session directory: {self.session_path}") diff --git a/tests/mock_and_patch/get_session_path_mock.py b/tests/mock_and_patch/get_session_path_mock.py index feff52f6..90c0cb5d 100644 --- a/tests/mock_and_patch/get_session_path_mock.py +++ b/tests/mock_and_patch/get_session_path_mock.py @@ -1,6 +1,7 @@ import tempfile from datetime import datetime from pathlib import Path +from uuid import uuid4 from primaite import getLogger @@ -14,9 +15,7 @@ def get_temp_session_path(session_timestamp: datetime) -> Path: :param session_timestamp: This is the datetime that the session started. :return: The session directory path. """ - date_dir = session_timestamp.strftime("%Y-%m-%d") - session_path = session_timestamp.strftime("%Y-%m-%d_%H-%M-%S") - session_path = Path(tempfile.gettempdir()) / "primaite" / date_dir / session_path + session_path = Path(tempfile.gettempdir()) / "primaite" / str(uuid4()) session_path.mkdir(exist_ok=True, parents=True) _LOGGER.debug(f"Created temp session directory: {session_path}") return session_path diff --git a/tests/test_seeding_and_deterministic_session.py b/tests/test_seeding_and_deterministic_session.py new file mode 100644 index 00000000..34cb43fb --- /dev/null +++ b/tests/test_seeding_and_deterministic_session.py @@ -0,0 +1,49 @@ +import pytest as pytest + +from primaite.config.lay_down_config import dos_very_basic_config_path +from tests import TEST_CONFIG_ROOT + + +@pytest.mark.parametrize( + "temp_primaite_session", + [[TEST_CONFIG_ROOT / "ppo_seeded_training_config.yaml", dos_very_basic_config_path()]], + indirect=True, +) +def test_seeded_learning(temp_primaite_session): + """Test running seeded learning produces the same output when ran twice.""" + expected_mean_reward_per_episode = { + 1: -90.703125, + 2: -91.15234375, + 3: -87.5, + 4: -92.2265625, + 5: -94.6875, + 6: -91.19140625, + 7: -88.984375, + 8: -88.3203125, + 9: -112.79296875, + 10: -100.01953125, + } + with temp_primaite_session as session: + assert session._training_config.seed == 67890, ( + "Expected output is based upon a agent that was trained with " "seed 67890" + ) + session.learn() + actual_mean_reward_per_episode = session.learn_av_reward_per_episode() + + assert actual_mean_reward_per_episode == expected_mean_reward_per_episode + + +@pytest.mark.skip(reason="Inconsistent results. Needs someone with RL " "knowledge to investigate further.") +@pytest.mark.parametrize( + "temp_primaite_session", + [[TEST_CONFIG_ROOT / "ppo_seeded_training_config.yaml", dos_very_basic_config_path()]], + indirect=True, +) +def test_deterministic_evaluation(temp_primaite_session): + """Test running deterministic evaluation gives same av eward per episode.""" + with temp_primaite_session as session: + # do stuff + session.learn() + session.evaluate() + eval_mean_reward = session.eval_av_reward_per_episode_csv() + assert len(set(eval_mean_reward.values())) == 1