Merge remote-tracking branch 'origin/dev' into feature/1468-observations-class

2023-06-09 09:01:54 +01:00
parent 9417cd85ab 6d502045cb
commit b917b65d49
11 changed files with 643 additions and 47 deletions
--- a/docs/source/about.rst
+++ b/docs/source/about.rst
@@ -351,29 +351,40 @@ The action space available to the blue agent comes in two types:

 1. Node-based
 2. Access Control List
+ 3. Any (Agent can take both node-based and ACL-based actions)

 The choice of action space used during a training session is determined in the config_[name].yaml file.

 **Node-Based**

-The agent is able to influence the status of nodes by switching them off, resetting, or patching operating systems and services. In this instance, the action space is an OpenAI Gym multidiscrete type, as follows:
+The agent is able to influence the status of nodes by switching them off, resetting, or patching operating systems and services. In this instance, the action space is an OpenAI Gym spaces.Discrete type, as follows:

- * [0, num nodes] - Node ID (0 = nothing, node ID)
- * [0, 4] - What property it's acting on (0 = nothing, 1 = state, 2 = SoftwareState, 3 = service state, 4 = file system state)
- * [0, 3] - Action on property (0 = nothing, 1 = on / scan, 2 = off / repair, 3 = reset / patch / restore)
- * [0, num services] - Resolves to service ID (0 = nothing, resolves to service)
+ * Dictionary item {... ,1: [x1, x2, x3,x4] ...}
+   The placeholders inside the list under the key '1' mean the following:
+
+    * [0, num nodes] - Node ID (0 = nothing, node ID)
+    * [0, 4] - What property it's acting on (0 = nothing, 1 = state, 2 = SoftwareState, 3 = service state, 4 = file system state)
+    * [0, 3] - Action on property (0 = nothing, 1 = on / scan, 2 = off / repair, 3 = reset / patch / restore)
+    * [0, num services] - Resolves to service ID (0 = nothing, resolves to service)

 **Access Control List**

-The blue agent is able to influence the configuration of the Access Control List rule set (which implements a system-wide firewall). In this instance, the action space is an OpenAI multidiscrete type, as follows:
+The blue agent is able to influence the configuration of the Access Control List rule set (which implements a system-wide firewall). In this instance, the action space is an OpenAI spaces.Discrete type, as follows:

+   * Dictionary item {... ,1: [x1, x2, x3, x4, x5, x6] ...}
+   The placeholders inside the list under the key '1' mean the following:

- * [0, 2] - Action (0 = do nothing, 1 = create rule, 2 = delete rule)
- * [0, 1] - Permission (0 = DENY, 1 = ALLOW)
- * [0, num nodes] - Source IP (0 = any, then 1 -> x resolving to IP addresses)
- * [0, num nodes] - Dest IP (0 = any, then 1 -> x resolving to IP addresses)
- * [0, num services] - Protocol (0 = any, then 1 -> x resolving to protocol)
- * [0, num ports] - Port (0 = any, then 1 -> x resolving to port)
+     * [0, 2] - Action (0 = do nothing, 1 = create rule, 2 = delete rule)
+     * [0, 1] - Permission (0 = DENY, 1 = ALLOW)
+     * [0, num nodes] - Source IP (0 = any, then 1 -> x resolving to IP addresses)
+     * [0, num nodes] - Dest IP (0 = any, then 1 -> x resolving to IP addresses)
+     * [0, num services] - Protocol (0 = any, then 1 -> x resolving to protocol)
+     * [0, num ports] - Port (0 = any, then 1 -> x resolving to port)
+
+**ANY**
+The agent is able to carry out both **Node-Based** and **Access Control List** operations.
+
+This means the dictionary will contain key-value pairs in the format of BOTH Node-Based and Access Control List as seen above.

 Rewards
 *******
--- a/src/primaite/agents/init.py
+++ b/src/primaite/agents/init.py
--- a/src/primaite/agents/utils.py
+++ b/src/primaite/agents/utils.py
@@ -0,0 +1,127 @@
+from primaite.common.enums import NodeHardwareAction, NodePOLType, NodeSoftwareAction
+
+
+def transform_action_node_readable(action):
+    """
+    Convert a node action from enumerated format to readable format.
+
+    example:
+    [1, 3, 1, 0] -> [1, 'SERVICE', 'PATCHING', 0]
+    """
+    action_node_property = NodePOLType(action[1]).name
+
+    if action_node_property == "OPERATING":
+        property_action = NodeHardwareAction(action[2]).name
+    elif (action_node_property == "OS" or action_node_property == "SERVICE") and action[
+        2
+    ] <= 1:
+        property_action = NodeSoftwareAction(action[2]).name
+    else:
+        property_action = "NONE"
+
+    new_action = [action[0], action_node_property, property_action, action[3]]
+    return new_action
+
+
+def transform_action_acl_readable(action):
+    """
+    Transform an ACL action to a more readable format.
+
+    example:
+    [0, 1, 2, 5, 0, 1] -> ['NONE', 'ALLOW', 2, 5, 'ANY', 1]
+    """
+    action_decisions = {0: "NONE", 1: "CREATE", 2: "DELETE"}
+    action_permissions = {0: "DENY", 1: "ALLOW"}
+
+    action_decision = action_decisions[action[0]]
+    action_permission = action_permissions[action[1]]
+
+    # For IPs, Ports and Protocols, 0 means any, otherwise its just an index
+    new_action = [action_decision, action_permission] + list(action[2:6])
+    for n, val in enumerate(list(action[2:6])):
+        if val == 0:
+            new_action[n + 2] = "ANY"
+
+    return new_action
+
+
+def is_valid_node_action(action):
+    """Is the node action an actual valid action.
+
+    Only uses information about the action to determine if the action has an effect
+
+    Does NOT consider:
+    - Node ID not valid to perform an operation - e.g. selected node has no service so cannot patch
+    - Node already being in that state (turning an ON node ON)
+    """
+    action_r = transform_action_node_readable(action)
+
+    node_property = action_r[1]
+    node_action = action_r[2]
+
+    # print("node property", node_property, "\nnode action", node_action)
+
+    if node_property == "NONE":
+        return False
+    if node_action == "NONE":
+        return False
+    if node_property == "OPERATING" and node_action == "PATCHING":
+        # Operating State cannot PATCH
+        return False
+    if node_property != "OPERATING" and node_action not in ["NONE", "PATCHING"]:
+        # Software States can only do Nothing or Patch
+        return False
+    return True
+
+
+def is_valid_acl_action(action):
+    """
+    Is the ACL action an actual valid action.
+
+    Only uses information about the action to determine if the action has an effect.
+
+    Does NOT consider:
+        - Trying to create identical rules
+        - Trying to create a rule which is a subset of another rule (caused by "ANY")
+    """
+    action_r = transform_action_acl_readable(action)
+
+    action_decision = action_r[0]
+    action_permission = action_r[1]
+    action_source_id = action_r[2]
+    action_destination_id = action_r[3]
+
+    if action_decision == "NONE":
+        return False
+    if (
+        action_source_id == action_destination_id
+        and action_source_id != "ANY"
+        and action_destination_id != "ANY"
+    ):
+        # ACL rule towards itself
+        return False
+    if action_permission == "DENY":
+        # DENY is unnecessary, we can create and delete allow rules instead
+        # No allow rule = blocked/DENY by feault. ALLOW overrides existing DENY.
+        return False
+
+    return True
+
+
+def is_valid_acl_action_extra(action):
+    """Harsher version of valid acl actions, does not allow action."""
+    if is_valid_acl_action(action) is False:
+        return False
+
+    action_r = transform_action_acl_readable(action)
+    action_protocol = action_r[4]
+    action_port = action_r[5]
+
+    # Don't allow protocols or ports to be ANY
+    # in the future we might want to do the opposite, and only have ANY option for ports and service
+    if action_protocol == "ANY":
+        return False
+    if action_port == "ANY":
+        return False
+
+    return True
--- a/src/primaite/common/enums.py
+++ b/src/primaite/common/enums.py
@@ -49,6 +49,7 @@ class SoftwareState(Enum):
 class NodePOLType(Enum):
    """Node Pattern of Life type enumeration."""

+    NONE = 0
    OPERATING = 1
    OS = 2
    SERVICE = 3
@@ -81,6 +82,7 @@ class ActionType(Enum):

    NODE = 0
    ACL = 1
+    ANY = 2


 class ObservationType(Enum):
@@ -98,3 +100,29 @@ class FileSystemState(Enum):
    DESTROYED = 3
    REPAIRING = 4
    RESTORING = 5
+
+
+class NodeHardwareAction(Enum):
+    """Node hardware action."""
+
+    NONE = 0
+    ON = 1
+    OFF = 2
+    RESET = 3
+
+
+class NodeSoftwareAction(Enum):
+    """Node software action."""
+
+    NONE = 0
+    PATCHING = 1
+
+
+class LinkStatus(Enum):
+    """Link traffic status."""
+
+    NONE = 0
+    LOW = 1
+    MEDIUM = 2
+    HIGH = 3
+    OVERLOAD = 4
--- a/src/primaite/environment/primaite_env.py
+++ b/src/primaite/environment/primaite_env.py
@@ -15,6 +15,7 @@ from gym import Env, spaces
 from matplotlib import pyplot as plt

 from primaite.acl.access_control_list import AccessControlList
+from primaite.agents.utils import is_valid_acl_action_extra, is_valid_node_action
 from primaite.common.custom_typing import NodeUnion
 from primaite.common.enums import (
    ActionType,
@@ -42,6 +43,7 @@ from primaite.pol.red_agent_pol import apply_red_agent_iers, apply_red_agent_nod
 from primaite.transactions.transaction import Transaction

 _LOGGER = logging.getLogger(__name__)
+_LOGGER.setLevel(logging.INFO)


 class Primaite(Env):
@@ -205,15 +207,9 @@ class Primaite(Env):
            # [0, 4] - what property it's acting on (0 = nothing, state, SoftwareState, service state, file system state) # noqa
            # [0, 3] - action on property (0 = nothing, On / Scan, Off / Repair, Reset / Patch / Restore) # noqa
            # [0, num services] - resolves to service ID (0 = nothing, resolves to service) # noqa
-            self.action_space = spaces.MultiDiscrete(
-                [
-                    self.num_nodes,
-                    self.ACTION_SPACE_NODE_PROPERTY_VALUES,
-                    self.ACTION_SPACE_NODE_ACTION_VALUES,
-                    self.num_services,
-                ]
-            )
-        else:
+            self.action_dict = self.create_node_action_dict()
+            self.action_space = spaces.Discrete(len(self.action_dict))
+        elif self.action_type == ActionType.ACL:
            _LOGGER.info("Action space type ACL selected")
            # Terms (for ACL action space):
            # [0, 2] - Action (0 = do nothing, 1 = create rule, 2 = delete rule)
@@ -222,17 +218,14 @@ class Primaite(Env):
            # [0, num nodes] - Dest IP (0 = any, then 1 -> x resolving to IP addresses)
            # [0, num services] - Protocol (0 = any, then 1 -> x resolving to protocol)
            # [0, num ports] - Port (0 = any, then 1 -> x resolving to port)
-            self.action_space = spaces.MultiDiscrete(
-                [
-                    self.ACTION_SPACE_ACL_ACTION_VALUES,
-                    self.ACTION_SPACE_ACL_PERMISSION_VALUES,
-                    self.num_nodes + 1,
-                    self.num_nodes + 1,
-                    self.num_services + 1,
-                    self.num_ports + 1,
-                ]
-            )
-
+            self.action_dict = self.create_acl_action_dict()
+            self.action_space = spaces.Discrete(len(self.action_dict))
+        elif self.action_type == ActionType.ANY:
+            _LOGGER.info("Action space type ANY selected - Node + ACL")
+            self.action_dict = self.create_node_and_acl_action_dict()
+            self.action_space = spaces.Discrete(len(self.action_dict))
+        else:
+            _LOGGER.info("Invalid action type selected")
        # Set up a csv to store the results of the training
        try:
            now = datetime.now()  # current date and time
@@ -378,7 +371,7 @@ class Primaite(Env):
            self.step_count,
            self.config_values,
        )
-        # print(f"    Step {self.step_count} Reward: {str(reward)}")
+        print(f"    Step {self.step_count} Reward: {str(reward)}")
        self.total_reward += reward
        if self.step_count == self.episode_steps:
            self.average_reward = self.total_reward / self.step_count
@@ -435,8 +428,18 @@ class Primaite(Env):
        # At the moment, actions are only affecting nodes
        if self.action_type == ActionType.NODE:
            self.apply_actions_to_nodes(_action)
-        else:
+        elif self.action_type == ActionType.ACL:
            self.apply_actions_to_acl(_action)
+        elif (
+            len(self.action_dict[_action]) == 6
+        ):  # ACL actions in multidiscrete form have len 6
+            self.apply_actions_to_acl(_action)
+        elif (
+            len(self.action_dict[_action]) == 4
+        ):  # Node actions in multdiscrete (array) from have len 4
+            self.apply_actions_to_nodes(_action)
+        else:
+            logging.error("Invalid action type found")

    def apply_actions_to_nodes(self, _action):
        """
@@ -445,10 +448,11 @@ class Primaite(Env):
        Args:
            _action: The action space from the agent
        """
-        node_id = _action[0]
-        node_property = _action[1]
-        property_action = _action[2]
-        service_index = _action[3]
+        readable_action = self.action_dict[_action]
+        node_id = readable_action[0]
+        node_property = readable_action[1]
+        property_action = readable_action[2]
+        service_index = readable_action[3]

        # Check that the action is requesting a valid node
        try:
@@ -534,12 +538,15 @@ class Primaite(Env):
        Args:
            _action: The action space from the agent
        """
-        action_decision = _action[0]
-        action_permission = _action[1]
-        action_source_ip = _action[2]
-        action_destination_ip = _action[3]
-        action_protocol = _action[4]
-        action_port = _action[5]
+        # Convert discrete value back to multidiscrete
+        readable_action = self.action_dict[_action]
+
+        action_decision = readable_action[0]
+        action_permission = readable_action[1]
+        action_source_ip = readable_action[2]
+        action_destination_ip = readable_action[3]
+        action_protocol = readable_action[4]
+        action_port = readable_action[5]

        if action_decision == 0:
            # It's decided to do nothing
@@ -1119,3 +1126,91 @@ class Primaite(Env):
        else:
            # Bad formatting
            pass
+
+    def create_node_action_dict(self):
+        """
+        Creates a dictionary mapping each possible discrete action to more readable multidiscrete action.
+
+        Note: Only actions that have the potential to change the state exist in the mapping (except for key 0)
+
+        example return:
+        {0: [1, 0, 0, 0],
+        1: [1, 1, 1, 0],
+        2: [1, 1, 2, 0],
+        3: [1, 1, 3, 0],
+        4: [1, 2, 1, 0],
+        5: [1, 3, 1, 0],
+        ...
+        }
+
+        """
+        # reserve 0 action to be a nothing action
+        actions = {0: [1, 0, 0, 0]}
+        action_key = 1
+        for node in range(1, self.num_nodes + 1):
+            # 4 node properties (NONE, OPERATING, OS, SERVICE)
+            for node_property in range(4):
+                # Node Actions either:
+                # (NONE, ON, OFF, RESET) - operating state OR (NONE, PATCH) - OS/service state
+                # Use MAX to ensure we get them all
+                for node_action in range(4):
+                    for service_state in range(self.num_services):
+                        action = [node, node_property, node_action, service_state]
+                        # check to see if it's a nothing action (has no effect)
+                        if is_valid_node_action(action):
+                            actions[action_key] = action
+                            action_key += 1
+
+        return actions
+
+    def create_acl_action_dict(self):
+        """Creates a dictionary mapping each possible discrete action to more readable multidiscrete action."""
+        # reserve 0 action to be a nothing action
+        actions = {0: [0, 0, 0, 0, 0, 0]}
+
+        action_key = 1
+        # 3 possible action decisions, 0=NOTHING, 1=CREATE, 2=DELETE
+        for action_decision in range(3):
+            # 2 possible action permissions 0 = DENY, 1 = CREATE
+            for action_permission in range(2):
+                # Number of nodes + 1 (for any)
+                for source_ip in range(self.num_nodes + 1):
+                    for dest_ip in range(self.num_nodes + 1):
+                        for protocol in range(self.num_services + 1):
+                            for port in range(self.num_ports + 1):
+                                action = [
+                                    action_decision,
+                                    action_permission,
+                                    source_ip,
+                                    dest_ip,
+                                    protocol,
+                                    port,
+                                ]
+                                # Check to see if its an action we want to include as possible i.e. not a nothing action
+                                if is_valid_acl_action_extra(action):
+                                    actions[action_key] = action
+                                    action_key += 1
+
+        return actions
+
+    def create_node_and_acl_action_dict(self):
+        """
+        Create a dictionary mapping each possible discrete action to a more readable mutlidiscrete action.
+
+        The dictionary contains actions of both Node and ACL action types.
+
+        """
+        node_action_dict = self.create_node_action_dict()
+        acl_action_dict = self.create_acl_action_dict()
+
+        # Change node keys to not overlap with acl keys
+        # Only 1 nothing action (key 0) is required, remove the other
+        new_node_action_dict = {
+            k + len(acl_action_dict) - 1: v
+            for k, v in node_action_dict.items()
+            if k != 0
+        }
+
+        # Combine the Node dict and ACL dict
+        combined_action_dict = {**acl_action_dict, **new_node_action_dict}
+        return combined_action_dict
--- a/tests/config/single_action_space_fixed_blue_actions_main_config.yaml
+++ b/tests/config/single_action_space_fixed_blue_actions_main_config.yaml
@@ -0,0 +1,89 @@
+# Main Config File
+
+# Generic config values
+# Choose one of these (dependent on Agent being trained)
+# "STABLE_BASELINES3_PPO"
+# "STABLE_BASELINES3_A2C"
+# "GENERIC"
+agentIdentifier: GENERIC
+# Number of episodes to run per session
+numEpisodes: 1
+# Time delay between steps (for generic agents)
+timeDelay: 1
+# Filename of the scenario / laydown
+configFilename: single_action_space_lay_down_config.yaml
+# Type of session to be run (TRAINING or EVALUATION)
+sessionType: TRAINING
+# Determine whether to load an agent from file
+loadAgent: False
+# File path and file name of agent if you're loading one in
+agentLoadFile: C:\[Path]\[agent_saved_filename.zip]
+
+# Environment config values
+# The high value for the observation space
+observationSpaceHighValue: 1000000000
+
+# Reward values
+# Generic
+allOk: 0
+# Node Operating State
+offShouldBeOn: -10
+offShouldBeResetting: -5
+onShouldBeOff: -2
+onShouldBeResetting: -5
+resettingShouldBeOn: -5
+resettingShouldBeOff: -2
+resetting: -3
+# Node O/S or Service State
+goodShouldBePatching: 2
+goodShouldBeCompromised: 5
+goodShouldBeOverwhelmed: 5
+patchingShouldBeGood: -5
+patchingShouldBeCompromised: 2
+patchingShouldBeOverwhelmed: 2
+patching: -3
+compromisedShouldBeGood: -20
+compromisedShouldBePatching: -20
+compromisedShouldBeOverwhelmed: -20
+compromised: -20
+overwhelmedShouldBeGood: -20
+overwhelmedShouldBePatching: -20
+overwhelmedShouldBeCompromised: -20
+overwhelmed: -20
+# Node File System State
+goodShouldBeRepairing: 2
+goodShouldBeRestoring: 2
+goodShouldBeCorrupt: 5
+goodShouldBeDestroyed: 10
+repairingShouldBeGood: -5
+repairingShouldBeRestoring: 2
+repairingShouldBeCorrupt: 2
+repairingShouldBeDestroyed: 0
+repairing: -3
+restoringShouldBeGood: -10
+restoringShouldBeRepairing: -2
+restoringShouldBeCorrupt: 1
+restoringShouldBeDestroyed: 2
+restoring: -6
+corruptShouldBeGood: -10
+corruptShouldBeRepairing: -10
+corruptShouldBeRestoring: -10
+corruptShouldBeDestroyed: 2
+corrupt: -10
+destroyedShouldBeGood: -20
+destroyedShouldBeRepairing: -20
+destroyedShouldBeRestoring: -20
+destroyedShouldBeCorrupt: -20
+destroyed: -20
+scanning: -2
+# IER status
+redIerRunning: -5
+greenIerBlocked: -10
+
+# Patching / Reset durations
+osPatchingDuration: 5            # The time taken to patch the OS
+nodeResetDuration: 5             # The time taken to reset a node (hardware)
+servicePatchingDuration: 5       # The time taken to patch a service
+fileSystemRepairingLimit: 5      # The time take to repair the file system
+fileSystemRestoringLimit: 5      # The time take to restore the file system
+fileSystemScanningLimit: 5       # The time taken to scan the file system
--- a/tests/config/single_action_space_lay_down_config.yaml
+++ b/tests/config/single_action_space_lay_down_config.yaml
@@ -0,0 +1,55 @@
+- itemType: ACTIONS
+  type: ANY
+- itemType: STEPS
+  steps: 15
+- itemType: PORTS
+  portsList:
+  - port: '21'
+- itemType: SERVICES
+  serviceList:
+  - name: ftp
+- itemType: NODE
+  node_id: '1'
+  name: node
+  node_class: SERVICE
+  node_type: COMPUTER
+  priority: P1
+  hardware_state: 'ON'
+  ip_address: 192.168.0.14
+  software_state: GOOD
+  file_system_state: GOOD
+  services:
+  - name: ftp
+    port: '21'
+    state: COMPROMISED
+- itemType: NODE
+  node_id: '2'
+  name: server_1
+  node_class: SERVICE
+  node_type: SERVER
+  priority: P1
+  hardware_state: 'ON'
+  ip_address: 192.168.0.1
+  software_state: GOOD
+  file_system_state: GOOD
+  services:
+  - name: ftp
+    port: '21'
+    state: COMPROMISED
+- itemType: POSITION
+  positions:
+  - node: '1'
+    x_pos: 309
+    y_pos: 78
+  - node: '2'
+    x_pos: 200
+    y_pos: 78
+- itemType: RED_IER
+  id: '3'
+  startStep: 2
+  endStep: 15
+  load: 1000
+  protocol: ftp
+  port: CORRUPT
+  source: '1'
+  destination: '2'
--- a/tests/config/single_action_space_main_config.yaml
+++ b/tests/config/single_action_space_main_config.yaml
@@ -0,0 +1,89 @@
+# Main Config File
+
+# Generic config values
+# Choose one of these (dependent on Agent being trained)
+# "STABLE_BASELINES3_PPO"
+# "STABLE_BASELINES3_A2C"
+# "GENERIC"
+agentIdentifier: GENERIC
+# Number of episodes to run per session
+numEpisodes: 1
+# Time delay between steps (for generic agents)
+timeDelay: 1
+# Filename of the scenario / laydown
+configFilename: single_action_space_lay_down_config.yaml
+# Type of session to be run (TRAINING or EVALUATION)
+sessionType: TRAINING
+# Determine whether to load an agent from file
+loadAgent: False
+# File path and file name of agent if you're loading one in
+agentLoadFile: C:\[Path]\[agent_saved_filename.zip]
+
+# Environment config values
+# The high value for the observation space
+observationSpaceHighValue: 1000000000
+
+# Reward values
+# Generic
+allOk: 0
+# Node Operating State
+offShouldBeOn: -10
+offShouldBeResetting: -5
+onShouldBeOff: -2
+onShouldBeResetting: -5
+resettingShouldBeOn: -5
+resettingShouldBeOff: -2
+resetting: -3
+# Node O/S or Service State
+goodShouldBePatching: 2
+goodShouldBeCompromised: 5
+goodShouldBeOverwhelmed: 5
+patchingShouldBeGood: -5
+patchingShouldBeCompromised: 2
+patchingShouldBeOverwhelmed: 2
+patching: -3
+compromisedShouldBeGood: -20
+compromisedShouldBePatching: -20
+compromisedShouldBeOverwhelmed: -20
+compromised: -20
+overwhelmedShouldBeGood: -20
+overwhelmedShouldBePatching: -20
+overwhelmedShouldBeCompromised: -20
+overwhelmed: -20
+# Node File System State
+goodShouldBeRepairing: 2
+goodShouldBeRestoring: 2
+goodShouldBeCorrupt: 5
+goodShouldBeDestroyed: 10
+repairingShouldBeGood: -5
+repairingShouldBeRestoring: 2
+repairingShouldBeCorrupt: 2
+repairingShouldBeDestroyed: 0
+repairing: -3
+restoringShouldBeGood: -10
+restoringShouldBeRepairing: -2
+restoringShouldBeCorrupt: 1
+restoringShouldBeDestroyed: 2
+restoring: -6
+corruptShouldBeGood: -10
+corruptShouldBeRepairing: -10
+corruptShouldBeRestoring: -10
+corruptShouldBeDestroyed: 2
+corrupt: -10
+destroyedShouldBeGood: -20
+destroyedShouldBeRepairing: -20
+destroyedShouldBeRestoring: -20
+destroyedShouldBeCorrupt: -20
+destroyed: -20
+scanning: -2
+# IER status
+redIerRunning: -5
+greenIerBlocked: -10
+
+# Patching / Reset durations
+osPatchingDuration: 5            # The time taken to patch the OS
+nodeResetDuration: 5             # The time taken to reset a node (hardware)
+servicePatchingDuration: 5       # The time taken to patch a service
+fileSystemRepairingLimit: 5      # The time take to repair the file system
+fileSystemRestoringLimit: 5      # The time take to restore the file system
+fileSystemScanningLimit: 5       # The time taken to scan the file system
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -168,12 +168,13 @@ def _get_primaite_env_from_config(
    # Load in config data
    load_config_values()
    env = Primaite(config_values, [])
+    # Get the number of steps (which is stored in the child config file)
    config_values.num_steps = env.episode_steps

    if env.config_values.agent_identifier == "GENERIC":
        run_generic(env, config_values)

-    return env
+    return env, config_values


 def run_generic(env, config_values):
@@ -185,7 +186,8 @@ def run_generic(env, config_values):
            # Send the observation space to the agent to get an action
            # TEMP - random action for now
            # action = env.blue_agent_action(obs)
-            action = env.action_space.sample()
+            # action = env.action_space.sample()
+            action = 0

            # Run the simulation step on the live environment
            obs, reward, done, info = env.step(action)
--- a/tests/test_reward.py
+++ b/tests/test_reward.py
@@ -8,7 +8,7 @@ def test_rewards_are_being_penalised_at_each_step_function():

    When the initial state is OFF compared to reference state which is ON.
    """
-    env = _get_primaite_env_from_config(
+    env, config_values = _get_primaite_env_from_config(
        main_config_path=TEST_CONFIG_ROOT / "one_node_states_on_off_main_config.yaml",
        lay_down_config_path=TEST_CONFIG_ROOT
        / "one_node_states_on_off_lay_down_config.yaml",
--- a/tests/test_single_action_space.py
+++ b/tests/test_single_action_space.py
@@ -0,0 +1,100 @@
+import time
+
+from primaite.common.enums import HardwareState
+from tests import TEST_CONFIG_ROOT
+from tests.conftest import _get_primaite_env_from_config
+
+
+def run_generic_set_actions(env, config_values):
+    """Run against a generic agent with specified blue agent actions."""
+    # Reset the environment at the start of the episode
+    # env.reset()
+    for episode in range(0, config_values.num_episodes):
+        for step in range(0, config_values.num_steps):
+            # Send the observation space to the agent to get an action
+            # TEMP - random action for now
+            # action = env.blue_agent_action(obs)
+            action = 0
+            print("Episode:", episode, "\nStep:", step)
+            if step == 5:
+                # [1, 1, 2, 1, 1, 1]
+                # Creates an ACL rule
+                # Allows traffic from server_1 to node_1 on port FTP
+                action = 7
+            elif step == 7:
+                # [1, 1, 2, 0] Node Action
+                # Sets Node 1 Hardware State to OFF
+                # Does not resolve any service
+                action = 16
+            # Run the simulation step on the live environment
+            obs, reward, done, info = env.step(action)
+
+            # Break if done is True
+            if done:
+                break
+
+            # Introduce a delay between steps
+            time.sleep(config_values.time_delay / 1000)
+
+        # Reset the environment at the end of the episode
+        # env.reset()
+
+    # env.close()
+
+
+def test_single_action_space_is_valid():
+    """Test to ensure the blue agent is using the ACL action space and is carrying out both kinds of operations."""
+    env, config_values = _get_primaite_env_from_config(
+        main_config_path=TEST_CONFIG_ROOT / "single_action_space_main_config.yaml",
+        lay_down_config_path=TEST_CONFIG_ROOT
+        / "single_action_space_lay_down_config.yaml",
+    )
+
+    run_generic_set_actions(env, config_values)
+
+    # Retrieve the action space dictionary values from environment
+    env_action_space_dict = env.action_dict.values()
+    # Flags to check the conditions of the action space
+    contains_acl_actions = False
+    contains_node_actions = False
+    both_action_spaces = False
+    # Loop through each element of the list (which is every value from the dictionary)
+    for dict_item in env_action_space_dict:
+        # Node action detected
+        if len(dict_item) == 4:
+            contains_node_actions = True
+        # Link action detected
+        elif len(dict_item) == 6:
+            contains_acl_actions = True
+    # If both are there then the ANY action type is working
+    if contains_node_actions and contains_acl_actions:
+        both_action_spaces = True
+    # Check condition should be True
+    assert both_action_spaces
+
+
+def test_agent_is_executing_actions_from_both_spaces():
+    """Test to ensure the blue agent is carrying out both kinds of operations (NODE & ACL)."""
+    env, config_values = _get_primaite_env_from_config(
+        main_config_path=TEST_CONFIG_ROOT
+        / "single_action_space_fixed_blue_actions_main_config.yaml",
+        lay_down_config_path=TEST_CONFIG_ROOT
+        / "single_action_space_lay_down_config.yaml",
+    )
+    # Run environment with specified fixed blue agent actions only
+    run_generic_set_actions(env, config_values)
+    # Retrieve hardware state of computer_1 node in laydown config
+    # Agent turned this off in Step 5
+    computer_node_hardware_state = env.nodes["1"].hardware_state
+    # Retrieve the Access Control List object stored by the environment at the end of the episode
+    access_control_list = env.acl
+    # Use the Access Control List object acl object attribute to get dictionary
+    # Use dictionary.values() to get total list of all items in the dictionary
+    acl_rules_list = access_control_list.acl.values()
+    # Length of this list tells you how many items are in the dictionary
+    # This number is the frequency of Access Control Rules in the environment
+    # In the scenario, we specified that the agent should create only 1 acl rule
+    num_of_rules = len(acl_rules_list)
+    # Therefore these statements below MUST be true
+    assert computer_node_hardware_state == HardwareState.OFF
+    assert num_of_rules == 1