75 lines
2.8 KiB
Python
75 lines
2.8 KiB
Python
from datetime import datetime
|
|
|
|
from primaite.config.lay_down_config import data_manipulation_config_path
|
|
from primaite.environment.primaite_env import Primaite
|
|
from tests import TEST_CONFIG_ROOT
|
|
from tests.conftest import _get_temp_session_path
|
|
|
|
|
|
def run_generic(env, config_values):
|
|
"""Run against a generic agent."""
|
|
# Reset the environment at the start of the episode
|
|
env.reset()
|
|
for episode in range(0, config_values.num_episodes):
|
|
for step in range(0, config_values.num_steps):
|
|
# Send the observation space to the agent to get an action
|
|
# TEMP - random action for now
|
|
# action = env.blue_agent_action(obs)
|
|
# action = env.action_space.sample()
|
|
action = 0
|
|
|
|
# Run the simulation step on the live environment
|
|
obs, reward, done, info = env.step(action)
|
|
|
|
# Break if done is True
|
|
if done:
|
|
break
|
|
|
|
# Reset the environment at the end of the episode
|
|
env.reset()
|
|
|
|
env.close()
|
|
|
|
|
|
def test_random_red_agent_behaviour():
|
|
"""
|
|
Test that hardware state is penalised at each step.
|
|
|
|
When the initial state is OFF compared to reference state which is ON.
|
|
"""
|
|
list_of_node_instructions = []
|
|
|
|
# RUN TWICE so we can make sure that red agent is randomised
|
|
for i in range(2):
|
|
|
|
"""Takes a config path and returns the created instance of Primaite."""
|
|
session_timestamp: datetime = datetime.now()
|
|
session_path = _get_temp_session_path(session_timestamp)
|
|
|
|
timestamp_str = session_timestamp.strftime("%Y-%m-%d_%H-%M-%S")
|
|
env = Primaite(
|
|
training_config_path=TEST_CONFIG_ROOT / "one_node_states_on_off_main_config.yaml",
|
|
lay_down_config_path=data_manipulation_config_path(),
|
|
transaction_list=[],
|
|
session_path=session_path,
|
|
timestamp_str=timestamp_str,
|
|
)
|
|
training_config = env.training_config
|
|
training_config.num_steps = env.episode_steps
|
|
|
|
run_generic(env, training_config)
|
|
# add red pol instructions to list
|
|
list_of_node_instructions.append(env.red_node_pol)
|
|
|
|
# compare instructions to make sure that red instructions are truly random
|
|
for index, instruction in enumerate(list_of_node_instructions):
|
|
for key in list_of_node_instructions[index].keys():
|
|
instruction: NodeInstructionRed = list_of_node_instructions[index][key]
|
|
print(f"run {index}")
|
|
print(f"{key} start step: {instruction.get_start_step()}")
|
|
print(f"{key} end step: {instruction.get_end_step()}")
|
|
print(f"{key} target node id: {instruction.get_target_node_id()}")
|
|
print("")
|
|
|
|
assert list_of_node_instructions[0].__ne__(list_of_node_instructions[1])
|