PrimAITE/tests/test_red_random_agent_behaviour.py

from datetime import datetime

from primaite.config.lay_down_config import data_manipulation_config_path
from primaite.environment.primaite_env import Primaite
from tests import TEST_CONFIG_ROOT
from tests.conftest import _get_temp_session_path


def run_generic(env, config_values):
    """Run against a generic agent."""
    # Reset the environment at the start of the episode
    env.reset()
    for episode in range(0, config_values.num_episodes):
        for step in range(0, config_values.num_steps):
            # Send the observation space to the agent to get an action
            # TEMP - random action for now
            # action = env.blue_agent_action(obs)
            # action = env.action_space.sample()
            action = 0

            # Run the simulation step on the live environment
            obs, reward, done, info = env.step(action)

            # Break if done is True
            if done:
                break

        # Reset the environment at the end of the episode
        env.reset()

    env.close()


def test_random_red_agent_behaviour():
    """
    Test that hardware state is penalised at each step.

    When the initial state is OFF compared to reference state which is ON.
    """
    list_of_node_instructions = []

    # RUN TWICE so we can make sure that red agent is randomised
    for i in range(2):

        """Takes a config path and returns the created instance of Primaite."""
        session_timestamp: datetime = datetime.now()
        session_path = _get_temp_session_path(session_timestamp)

        timestamp_str = session_timestamp.strftime("%Y-%m-%d_%H-%M-%S")
        env = Primaite(
            training_config_path=TEST_CONFIG_ROOT / "one_node_states_on_off_main_config.yaml",
            lay_down_config_path=data_manipulation_config_path(),
            transaction_list=[],
            session_path=session_path,
            timestamp_str=timestamp_str,
        )
        training_config = env.training_config
        training_config.num_steps = env.episode_steps

        run_generic(env, training_config)
        # add red pol instructions to list
        list_of_node_instructions.append(env.red_node_pol)

    # compare instructions to make sure that red instructions are truly random
    for index, instruction in enumerate(list_of_node_instructions):
        for key in list_of_node_instructions[index].keys():
            instruction: NodeInstructionRed = list_of_node_instructions[index][key]
            print(f"run {index}")
            print(f"{key} start step: {instruction.get_start_step()}")
            print(f"{key} end step: {instruction.get_end_step()}")
            print(f"{key} target node id: {instruction.get_target_node_id()}")
            print("")

    assert list_of_node_instructions[0].__ne__(list_of_node_instructions[1])
#1522: added a check for existing links in laydown + test that checks if red agent instructions are random 2023-07-03 09:46:52 +01:00			`from datetime import datetime`
feature\1522: Create random red agent behaviour. 2023-06-22 15:34:13 +01:00
#1522: added a check for existing links in laydown + test that checks if red agent instructions are random 2023-07-03 09:46:52 +01:00			`from primaite.config.lay_down_config import data_manipulation_config_path`
feature\1522: Create random red agent behaviour. 2023-06-22 15:34:13 +01:00			`from primaite.environment.primaite_env import Primaite`
			`from tests import TEST_CONFIG_ROOT`
			`from tests.conftest import _get_temp_session_path`


			`def run_generic(env, config_values):`
			`"""Run against a generic agent."""`
			`# Reset the environment at the start of the episode`
			`env.reset()`
			`for episode in range(0, config_values.num_episodes):`
			`for step in range(0, config_values.num_steps):`
			`# Send the observation space to the agent to get an action`
			`# TEMP - random action for now`
			`# action = env.blue_agent_action(obs)`
			`# action = env.action_space.sample()`
			`action = 0`

			`# Run the simulation step on the live environment`
			`obs, reward, done, info = env.step(action)`

			`# Break if done is True`
			`if done:`
			`break`

			`# Reset the environment at the end of the episode`
			`env.reset()`

			`env.close()`


			`def test_random_red_agent_behaviour():`
			`"""`
			`Test that hardware state is penalised at each step.`

			`When the initial state is OFF compared to reference state which is ON.`
			`"""`
			`list_of_node_instructions = []`
#1522: added a check for existing links in laydown + test that checks if red agent instructions are random 2023-07-03 09:46:52 +01:00
			`# RUN TWICE so we can make sure that red agent is randomised`
feature\1522: Create random red agent behaviour. 2023-06-22 15:34:13 +01:00			`for i in range(2):`

			`"""Takes a config path and returns the created instance of Primaite."""`
			`session_timestamp: datetime = datetime.now()`
			`session_path = _get_temp_session_path(session_timestamp)`

			`timestamp_str = session_timestamp.strftime("%Y-%m-%d_%H-%M-%S")`
			`env = Primaite(`
			`training_config_path=TEST_CONFIG_ROOT / "one_node_states_on_off_main_config.yaml",`
#1522: added a check for existing links in laydown + test that checks if red agent instructions are random 2023-07-03 09:46:52 +01:00			`lay_down_config_path=data_manipulation_config_path(),`
feature\1522: Create random red agent behaviour. 2023-06-22 15:34:13 +01:00			`transaction_list=[],`
			`session_path=session_path,`
			`timestamp_str=timestamp_str,`
			`)`
			`training_config = env.training_config`
			`training_config.num_steps = env.episode_steps`

#1522: added a check for existing links in laydown + test that checks if red agent instructions are random 2023-07-03 09:46:52 +01:00			`run_generic(env, training_config)`
			`# add red pol instructions to list`
			`list_of_node_instructions.append(env.red_node_pol)`

			`# compare instructions to make sure that red instructions are truly random`
			`for index, instruction in enumerate(list_of_node_instructions):`
			`for key in list_of_node_instructions[index].keys():`
			`instruction: NodeInstructionRed = list_of_node_instructions[index][key]`
			`print(f"run {index}")`
			`print(f"{key} start step: {instruction.get_start_step()}")`
			`print(f"{key} end step: {instruction.get_end_step()}")`
			`print(f"{key} target node id: {instruction.get_target_node_id()}")`
			`print("")`

feature\1522: Create random red agent behaviour. 2023-06-22 15:34:13 +01:00			`assert list_of_node_instructions[0].__ne__(list_of_node_instructions[1])`