Merge branch 'dev' into feature/1386-enable-a-repeatable-or-deterministic-baseline-test

This commit is contained in:
Czar Echavez
2023-07-03 16:56:44 +01:00
16 changed files with 527 additions and 79 deletions

View File

@@ -0,0 +1,77 @@
from datetime import datetime
from primaite.config.lay_down_config import data_manipulation_config_path
from primaite.environment.primaite_env import Primaite
from primaite.nodes.node_state_instruction_red import NodeStateInstructionRed
from tests import TEST_CONFIG_ROOT
from tests.conftest import _get_temp_session_path
def run_generic(env, config_values):
"""Run against a generic agent."""
# Reset the environment at the start of the episode
env.reset()
for episode in range(0, config_values.num_episodes):
for step in range(0, config_values.num_steps):
# Send the observation space to the agent to get an action
# TEMP - random action for now
# action = env.blue_agent_action(obs)
# action = env.action_space.sample()
action = 0
# Run the simulation step on the live environment
obs, reward, done, info = env.step(action)
# Break if done is True
if done:
break
# Reset the environment at the end of the episode
env.reset()
env.close()
def test_random_red_agent_behaviour():
"""
Test that hardware state is penalised at each step.
When the initial state is OFF compared to reference state which is ON.
"""
list_of_node_instructions = []
# RUN TWICE so we can make sure that red agent is randomised
for i in range(2):
"""Takes a config path and returns the created instance of Primaite."""
session_timestamp: datetime = datetime.now()
session_path = _get_temp_session_path(session_timestamp)
timestamp_str = session_timestamp.strftime("%Y-%m-%d_%H-%M-%S")
env = Primaite(
training_config_path=TEST_CONFIG_ROOT
/ "one_node_states_on_off_main_config.yaml",
lay_down_config_path=data_manipulation_config_path(),
transaction_list=[],
session_path=session_path,
timestamp_str=timestamp_str,
)
# set red_agent_
env.training_config.random_red_agent = True
training_config = env.training_config
training_config.num_steps = env.episode_steps
run_generic(env, training_config)
# add red pol instructions to list
list_of_node_instructions.append(env.red_node_pol)
# compare instructions to make sure that red instructions are truly random
for index, instruction in enumerate(list_of_node_instructions):
for key in list_of_node_instructions[index].keys():
instruction: NodeStateInstructionRed = list_of_node_instructions[index][key]
print(f"run {index}")
print(f"{key} start step: {instruction.get_start_step()}")
print(f"{key} end step: {instruction.get_end_step()}")
print(f"{key} target node id: {instruction.get_target_node_id()}")
print("")
assert list_of_node_instructions[0].__ne__(list_of_node_instructions[1])

View File

@@ -16,17 +16,26 @@ def test_rewards_are_being_penalised_at_each_step_function():
)
"""
On different steps (of the 13 in total) these are the following rewards for config_6 which are activated:
File System State: goodShouldBeCorrupt = 5 (between Steps 1 & 3)
Hardware State: onShouldBeOff = -2 (between Steps 4 & 6)
Service State: goodShouldBeCompromised = 5 (between Steps 7 & 9)
Software State (Software State): goodShouldBeCompromised = 5 (between Steps 10 & 12)
The config 'one_node_states_on_off_lay_down_config.yaml' has 15 steps:
On different steps, the laydown config has Pattern of Life (PoLs) which change a state of the node's attribute.
For example, turning the nodes' file system state to CORRUPT from its original state GOOD.
As a result these are the following rewards are activated:
File System State: corrupt_should_be_good = -10 * 2 (on Steps 1 & 2)
Hardware State: off_should_be_on = -10 * 2 (on Steps 4 & 5)
Service State: compromised_should_be_good = -20 * 2 (on Steps 7 & 8)
Software State: compromised_should_be_good = -20 * 2 (on Steps 10 & 11)
Total Reward: -2 - 2 + 5 + 5 + 5 + 5 + 5 + 5 = 26
Step Count: 13
The Pattern of Life (PoLs) last for 2 steps, so the agent is penalised twice.
Note: This test run inherits from conftest.py where the PrimAITE environment is ran and the blue agent is hard-coded
to do NOTHING on every step.
We use Pattern of Lifes (PoLs) to change the nodes states and display that the agent is being penalised on all steps
where the live network node differs from the network reference node.
Total Reward: -10 + -10 + -10 + -10 + -20 + -20 + -20 + -20 = -120
Step Count: 15
For the 4 steps where this occurs the average reward is:
Average Reward: 2 (26 / 13)
Average Reward: -8 (-120 / 15)
"""
print("average reward", env.average_reward)
assert env.average_reward == -8.0