from datetime import datetime from primaite.config.lay_down_config import data_manipulation_config_path from primaite.environment.primaite_env import Primaite from tests import TEST_CONFIG_ROOT from tests.conftest import _get_temp_session_path def run_generic(env, config_values): """Run against a generic agent.""" # Reset the environment at the start of the episode env.reset() for episode in range(0, config_values.num_episodes): for step in range(0, config_values.num_steps): # Send the observation space to the agent to get an action # TEMP - random action for now # action = env.blue_agent_action(obs) # action = env.action_space.sample() action = 0 # Run the simulation step on the live environment obs, reward, done, info = env.step(action) # Break if done is True if done: break # Reset the environment at the end of the episode env.reset() env.close() def test_random_red_agent_behaviour(): """ Test that hardware state is penalised at each step. When the initial state is OFF compared to reference state which is ON. """ list_of_node_instructions = [] # RUN TWICE so we can make sure that red agent is randomised for i in range(2): """Takes a config path and returns the created instance of Primaite.""" session_timestamp: datetime = datetime.now() session_path = _get_temp_session_path(session_timestamp) timestamp_str = session_timestamp.strftime("%Y-%m-%d_%H-%M-%S") env = Primaite( training_config_path=TEST_CONFIG_ROOT / "one_node_states_on_off_main_config.yaml", lay_down_config_path=data_manipulation_config_path(), transaction_list=[], session_path=session_path, timestamp_str=timestamp_str, ) training_config = env.training_config training_config.num_steps = env.episode_steps run_generic(env, training_config) # add red pol instructions to list list_of_node_instructions.append(env.red_node_pol) # compare instructions to make sure that red instructions are truly random for index, instruction in enumerate(list_of_node_instructions): for key in list_of_node_instructions[index].keys(): instruction: NodeInstructionRed = list_of_node_instructions[index][key] print(f"run {index}") print(f"{key} start step: {instruction.get_start_step()}") print(f"{key} end step: {instruction.get_end_step()}") print(f"{key} target node id: {instruction.get_target_node_id()}") print("") assert list_of_node_instructions[0].__ne__(list_of_node_instructions[1])