PrimAITE/tests/integration_tests/game_layer/test_rewards.py

# © Crown-owned copyright 2025, Defence Science and Technology Laboratory UK
import pytest
import yaml

from primaite.game.agent.interface import AgentHistoryItem
from primaite.game.agent.rewards import ActionPenalty, GreenAdminDatabaseUnreachablePenalty, WebpageUnavailablePenalty
from primaite.game.game import PrimaiteGame
from primaite.interface.request import RequestResponse
from primaite.session.environment import PrimaiteGymEnv
from primaite.simulator.network.hardware.nodes.host.server import Server
from primaite.simulator.network.hardware.nodes.network.router import ACLAction, Router
from primaite.simulator.system.applications.database_client import DatabaseClient
from primaite.simulator.system.applications.web_browser import WebBrowser
from primaite.simulator.system.services.database.database_service import DatabaseService
from primaite.utils.validation.ip_protocol import PROTOCOL_LOOKUP
from primaite.utils.validation.port import PORT_LOOKUP
from tests import TEST_ASSETS_ROOT
from tests.conftest import ControlledAgent


def test_WebpageUnavailablePenalty(game_and_agent: tuple[PrimaiteGame, ControlledAgent]):
    """Test that we get the right reward for failing to fetch a website."""
    # set up the scenario, configure the web browser to the correct url
    game, agent = game_and_agent
    agent: ControlledAgent
    schema = WebpageUnavailablePenalty.ConfigSchema(node_hostname="client_1", sticky=True)
    comp = WebpageUnavailablePenalty(config=schema)

    client_1 = game.simulation.network.get_node_by_hostname("client_1")
    browser: WebBrowser = client_1.software_manager.software.get("web-browser")
    browser.run()
    browser.config.target_url = "http://www.example.com"
    agent.reward_function.register_component(comp, 0.7)

    # Check that before trying to fetch the webpage, the reward is 0.0
    agent.store_action(("do-nothing", {}))
    game.step()
    assert agent.reward_function.current_reward == 0.0

    # Check that successfully fetching the webpage yields a reward of 0.7
    agent.store_action(("node-application-execute", {"node_name": "client_1", "application_name": "web-browser"}))
    game.step()
    assert agent.reward_function.current_reward == 0.7

    # Block the web traffic, check that failing to fetch the webpage yields a reward of -0.7
    router: Router = game.simulation.network.get_node_by_hostname("router")
    router.acl.add_rule(
        action=ACLAction.DENY,
        protocol=PROTOCOL_LOOKUP["TCP"],
        src_port=PORT_LOOKUP["HTTP"],
        dst_port=PORT_LOOKUP["HTTP"],
    )
    agent.store_action(("node-application-execute", {"node_name": "client_1", "application_name": "web-browser"}))
    game.step()
    assert agent.reward_function.current_reward == -0.7


def test_uc2_rewards(game_and_agent: tuple[PrimaiteGame, ControlledAgent]):
    """Test that the reward component correctly applies a penalty when the selected client cannot reach the database."""
    game, agent = game_and_agent
    agent: ControlledAgent

    server_1: Server = game.simulation.network.get_node_by_hostname("server_1")
    server_1.software_manager.install(DatabaseService)
    db_service = server_1.software_manager.software.get("database-service")
    db_service.start()

    client_1 = game.simulation.network.get_node_by_hostname("client_1")
    client_1.software_manager.install(DatabaseClient)
    db_client: DatabaseClient = client_1.software_manager.software.get("database-client")
    db_client.configure(server_ip_address=server_1.network_interface[1].ip_address)
    db_client.run()

    router: Router = game.simulation.network.get_node_by_hostname("router")
    router.acl.add_rule(
        ACLAction.PERMIT, src_port=PORT_LOOKUP["POSTGRES_SERVER"], dst_port=PORT_LOOKUP["POSTGRES_SERVER"], position=2
    )

    schema = GreenAdminDatabaseUnreachablePenalty.ConfigSchema(node_hostname="client_1", sticky=True)
    comp = GreenAdminDatabaseUnreachablePenalty(config=schema)

    request = ["network", "node", "client_1", "application", "database-client", "execute"]
    response = game.simulation.apply_request(request)
    state = game.get_sim_state()
    ahi = AgentHistoryItem(
        timestep=0, action="node-application-execute", parameters={}, request=request, response=response
    )
    reward_value = comp.calculate(state, last_action_response=ahi)
    assert reward_value == 1.0
    assert ahi.reward_info == {"connection_attempt_status": "success"}

    router.acl.remove_rule(position=2)

    response = game.simulation.apply_request(request)
    state = game.get_sim_state()
    ahi = AgentHistoryItem(
        timestep=0, action="node-application-execute", parameters={}, request=request, response=response
    )
    reward_value = comp.calculate(
        state,
        last_action_response=ahi,
    )
    assert reward_value == -1.0
    assert ahi.reward_info == {"connection_attempt_status": "failure"}


def test_shared_reward():
    CFG_PATH = TEST_ASSETS_ROOT / "configs/shared_rewards.yaml"
    with open(CFG_PATH, "r") as f:
        cfg = yaml.safe_load(f)

    env = PrimaiteGymEnv(env_config=cfg)

    env.reset()

    order = env.game._reward_calculation_order
    assert order.index("defender") > order.index("client_1_green_user")
    assert order.index("defender") > order.index("client_2_green_user")

    for step in range(256):
        act = env.action_space.sample()
        env.step(act)
        g1_reward = env.game.agents["client_1_green_user"].reward_function.current_reward
        g2_reward = env.game.agents["client_2_green_user"].reward_function.current_reward
        blue_reward = env.game.agents["defender"].reward_function.current_reward
        assert blue_reward == g1_reward + g2_reward


def test_action_penalty_loads_from_config():
    """Test to ensure that action penalty is correctly loaded from config into PrimaiteGymEnv"""
    CFG_PATH = TEST_ASSETS_ROOT / "configs/action_penalty.yaml"
    with open(CFG_PATH, "r") as f:
        cfg = yaml.safe_load(f)

    env = PrimaiteGymEnv(env_config=cfg)

    env.reset()
    defender = env.game.agents["defender"]
    act_penalty_obj = None
    for comp in defender.reward_function.reward_components:
        if isinstance(comp[0], ActionPenalty):
            act_penalty_obj = comp[0]
    if act_penalty_obj is None:
        pytest.fail("Action penalty reward component was not added to the agent from config.")
    assert act_penalty_obj.config.action_penalty == -0.75
    assert act_penalty_obj.config.do_nothing_penalty == 0.125


def test_action_penalty():
    """Test that the action penalty is correctly applied when agent performs any action"""

    # Create an ActionPenalty Reward
    schema = ActionPenalty.ConfigSchema(action_penalty=-0.75, do_nothing_penalty=0.125)
    # Penalty = ActionPenalty(action_penalty=-0.75, do_nothing_penalty=0.125)
    Penalty = ActionPenalty(config=schema)

    # Assert that penalty is applied if action isn't do-nothing
    reward_value = Penalty.calculate(
        state={},
        last_action_response=AgentHistoryItem(
            timestep=0,
            action="node-application-execute",
            parameters={"node_name": "client", "application_name": "web-browser"},
            request=["execute"],
            response=RequestResponse.from_bool(True),
        ),
    )

    assert reward_value == -0.75

    # Assert that no penalty applied for a do-nothing action
    reward_value = Penalty.calculate(
        state={},
        last_action_response=AgentHistoryItem(
            timestep=0,
            action="do-nothing",
            parameters={},
            request=["do-nothing"],
            response=RequestResponse.from_bool(True),
        ),
    )

    assert reward_value == 0.125


def test_action_penalty_e2e(game_and_agent: tuple[PrimaiteGame, ControlledAgent]):
    """Test that we get the right reward for doing actions to fetch a website."""
    game, agent = game_and_agent
    agent: ControlledAgent
    schema = ActionPenalty.ConfigSchema(action_penalty=-0.75, do_nothing_penalty=0.125)
    comp = ActionPenalty(config=schema)

    agent.reward_function.register_component(comp, 1.0)

    action = ("do-nothing", {})
    agent.store_action(action)
    game.step()
    assert agent.reward_function.current_reward == 0.125

    action = ("node-file-scan", {"node_name": "client", "folder_name": "downloads", "file_name": "document.pdf"})
    agent.store_action(action)
    game.step()
    assert agent.reward_function.current_reward == -0.75
Change copyright to 2025 2025-01-02 15:05:06 +00:00			`# © Crown-owned copyright 2025, Defence Science and Technology Laboratory UK`
#2656 - Make action penalty more configurable 2024-06-27 12:01:32 +01:00			`import pytest`
Add shared reward test 2024-03-13 14:01:17 +00:00			`import yaml`

fix reward logging 2024-05-31 15:00:18 +01:00			`from primaite.game.agent.interface import AgentHistoryItem`
#2656 - Unit tests for new ActionPenalty reward component, testing yaml and some minor changes to the implementation. Need to update Documentation to detail how this is added 2024-06-26 12:20:28 +01:00			`from primaite.game.agent.rewards import ActionPenalty, GreenAdminDatabaseUnreachablePenalty, WebpageUnavailablePenalty`
Add shared reward test 2024-03-13 14:01:17 +00:00			`from primaite.game.game import PrimaiteGame`
#2656 - Make action penalty more configurable 2024-06-27 12:01:32 +01:00			`from primaite.interface.request import RequestResponse`
Add shared reward test 2024-03-13 14:01:17 +00:00			`from primaite.session.environment import PrimaiteGymEnv`
Add test for new reward 2024-03-03 15:52:34 +00:00			`from primaite.simulator.network.hardware.nodes.host.server import Server`
#2248 - synced wth dev 2024-02-08 16:15:57 +00:00			`from primaite.simulator.network.hardware.nodes.network.router import ACLAction, Router`
Add test for new reward 2024-03-03 15:52:34 +00:00			`from primaite.simulator.system.applications.database_client import DatabaseClient`
#2736 - Fix up broken reward tests 2024-08-19 13:59:35 +01:00			`from primaite.simulator.system.applications.web_browser import WebBrowser`
Add test for new reward 2024-03-03 15:52:34 +00:00			`from primaite.simulator.system.services.database.database_service import DatabaseService`
Change port and protocol to annotated validators 2024-09-25 16:28:22 +01:00			`from primaite.utils.validation.ip_protocol import PROTOCOL_LOOKUP`
			`from primaite.utils.validation.port import PORT_LOOKUP`
Add shared reward test 2024-03-13 14:01:17 +00:00			`from tests import TEST_ASSETS_ROOT`
Add reward test. 2024-02-06 15:05:44 +00:00			`from tests.conftest import ControlledAgent`


#2913: Updated tests 2024-11-06 14:52:22 +00:00			`def test_WebpageUnavailablePenalty(game_and_agent: tuple[PrimaiteGame, ControlledAgent]):`
Add reward test. 2024-02-06 15:05:44 +00:00			`"""Test that we get the right reward for failing to fetch a website."""`
#2736 - Fix up broken reward tests 2024-08-19 13:59:35 +01:00			`# set up the scenario, configure the web browser to the correct url`
Add reward test. 2024-02-06 15:05:44 +00:00			`game, agent = game_and_agent`
			`agent: ControlledAgent`
#2913: Remove from_config() and refactor (WIP). 2024-11-04 17:41:43 +00:00			`schema = WebpageUnavailablePenalty.ConfigSchema(node_hostname="client_1", sticky=True)`
			`comp = WebpageUnavailablePenalty(config=schema)`
#2913: Make rewards work with config file. 2024-11-06 11:35:06 +00:00
#2736 - Fix up broken reward tests 2024-08-19 13:59:35 +01:00			`client_1 = game.simulation.network.get_node_by_hostname("client_1")`
#3062 - First pass at unifying naming convention for discriminators (still errors) [skip ci] 2025-02-03 16:24:03 +00:00			`browser: WebBrowser = client_1.software_manager.software.get("web-browser")`
#2736 - Fix up broken reward tests 2024-08-19 13:59:35 +01:00			`browser.run()`
#2888: self.X -> self.config.X 2025-01-13 15:38:11 +00:00			`browser.config.target_url = "http://www.example.com"`
Add reward test. 2024-02-06 15:05:44 +00:00			`agent.reward_function.register_component(comp, 0.7)`

#2736 - Fix up broken reward tests 2024-08-19 13:59:35 +01:00			`# Check that before trying to fetch the webpage, the reward is 0.0`
#3062 - First pass at unifying naming convention for discriminators (still errors) [skip ci] 2025-02-03 16:24:03 +00:00			`agent.store_action(("do-nothing", {}))`
#2736 - Fix up broken reward tests 2024-08-19 13:59:35 +01:00			`game.step()`
Add reward test. 2024-02-06 15:05:44 +00:00			`assert agent.reward_function.current_reward == 0.0`

#2736 - Fix up broken reward tests 2024-08-19 13:59:35 +01:00			`# Check that successfully fetching the webpage yields a reward of 0.7`
#3062 - First pass at unifying naming convention for discriminators (still errors) [skip ci] 2025-02-03 16:24:03 +00:00			`agent.store_action(("node-application-execute", {"node_name": "client_1", "application_name": "web-browser"}))`
Add reward test. 2024-02-06 15:05:44 +00:00			`game.step()`
			`assert agent.reward_function.current_reward == 0.7`

#2736 - Fix up broken reward tests 2024-08-19 13:59:35 +01:00			`# Block the web traffic, check that failing to fetch the webpage yields a reward of -0.7`
Add reward test. 2024-02-06 15:05:44 +00:00			`router: Router = game.simulation.network.get_node_by_hostname("router")`
Add port and protocol custom validators 2024-09-20 11:21:28 +01:00			`router.acl.add_rule(`
			`action=ACLAction.DENY,`
			`protocol=PROTOCOL_LOOKUP["TCP"],`
			`src_port=PORT_LOOKUP["HTTP"],`
			`dst_port=PORT_LOOKUP["HTTP"],`
			`)`
#3062 - First pass at unifying naming convention for discriminators (still errors) [skip ci] 2025-02-03 16:24:03 +00:00			`agent.store_action(("node-application-execute", {"node_name": "client_1", "application_name": "web-browser"}))`
Add reward test. 2024-02-06 15:05:44 +00:00			`game.step()`
			`assert agent.reward_function.current_reward == -0.7`
Add test for new reward 2024-03-03 15:52:34 +00:00

#2913: Updated tests 2024-11-06 14:52:22 +00:00			`def test_uc2_rewards(game_and_agent: tuple[PrimaiteGame, ControlledAgent]):`
Apply suggestions from PR review. 2024-03-04 09:58:57 +00:00			`"""Test that the reward component correctly applies a penalty when the selected client cannot reach the database."""`
Add test for new reward 2024-03-03 15:52:34 +00:00			`game, agent = game_and_agent`
			`agent: ControlledAgent`

			`server_1: Server = game.simulation.network.get_node_by_hostname("server_1")`
			`server_1.software_manager.install(DatabaseService)`
#3062 - First pass at unifying naming convention for discriminators (still errors) [skip ci] 2025-02-03 16:24:03 +00:00			`db_service = server_1.software_manager.software.get("database-service")`
Add test for new reward 2024-03-03 15:52:34 +00:00			`db_service.start()`

			`client_1 = game.simulation.network.get_node_by_hostname("client_1")`
			`client_1.software_manager.install(DatabaseClient)`
#3062 - First pass at unifying naming convention for discriminators (still errors) [skip ci] 2025-02-03 16:24:03 +00:00			`db_client: DatabaseClient = client_1.software_manager.software.get("database-client")`
Add test for new reward 2024-03-03 15:52:34 +00:00			`db_client.configure(server_ip_address=server_1.network_interface[1].ip_address)`
			`db_client.run()`

			`router: Router = game.simulation.network.get_node_by_hostname("router")`
Fix airspace and remaining port problems from refactor 2024-09-19 15:06:29 +01:00			`router.acl.add_rule(`
Add port and protocol custom validators 2024-09-20 11:21:28 +01:00			`ACLAction.PERMIT, src_port=PORT_LOOKUP["POSTGRES_SERVER"], dst_port=PORT_LOOKUP["POSTGRES_SERVER"], position=2`
Fix airspace and remaining port problems from refactor 2024-09-19 15:06:29 +01:00			`)`
Add test for new reward 2024-03-03 15:52:34 +00:00
#2913: Remove from_config() and refactor (WIP). 2024-11-04 17:41:43 +00:00			`schema = GreenAdminDatabaseUnreachablePenalty.ConfigSchema(node_hostname="client_1", sticky=True)`
			`comp = GreenAdminDatabaseUnreachablePenalty(config=schema)`
Add test for new reward 2024-03-03 15:52:34 +00:00
#3062 - First pass at unifying naming convention for discriminators (still errors) [skip ci] 2025-02-03 16:24:03 +00:00			`request = ["network", "node", "client_1", "application", "database-client", "execute"]`
#2736 - Fix up broken reward tests 2024-08-19 13:59:35 +01:00			`response = game.simulation.apply_request(request)`
Add test for new reward 2024-03-03 15:52:34 +00:00			`state = game.get_sim_state()`
#2748: Port of PrimAITE Internal changes. 2024-08-19 12:55:45 +01:00			`ahi = AgentHistoryItem(`
#3062 - First pass at unifying naming convention for discriminators (still errors) [skip ci] 2025-02-03 16:24:03 +00:00			`timestep=0, action="node-application-execute", parameters={}, request=request, response=response`
Minor fixes 2024-03-14 14:33:04 +00:00			`)`
#2748: Port of PrimAITE Internal changes. 2024-08-19 12:55:45 +01:00			`reward_value = comp.calculate(state, last_action_response=ahi)`
Add test for new reward 2024-03-03 15:52:34 +00:00			`assert reward_value == 1.0`
Merge remote-tracking branch 'origin/dev' into feature/2736-instantaneous-rewards 2024-08-20 10:40:48 +01:00			`assert ahi.reward_info == {"connection_attempt_status": "success"}`
Add test for new reward 2024-03-03 15:52:34 +00:00
			`router.acl.remove_rule(position=2)`

#2736 - Fix up broken reward tests 2024-08-19 13:59:35 +01:00			`response = game.simulation.apply_request(request)`
Add test for new reward 2024-03-03 15:52:34 +00:00			`state = game.get_sim_state()`
Merge remote-tracking branch 'origin/dev' into feature/2736-instantaneous-rewards 2024-08-20 10:40:48 +01:00			`ahi = AgentHistoryItem(`
#3062 - First pass at unifying naming convention for discriminators (still errors) [skip ci] 2025-02-03 16:24:03 +00:00			`timestep=0, action="node-application-execute", parameters={}, request=request, response=response`
Merge remote-tracking branch 'origin/dev' into feature/2736-instantaneous-rewards 2024-08-20 10:40:48 +01:00			`)`
Minor fixes 2024-03-14 14:33:04 +00:00			`reward_value = comp.calculate(`
			`state,`
Merge remote-tracking branch 'origin/dev' into feature/2736-instantaneous-rewards 2024-08-20 10:40:48 +01:00			`last_action_response=ahi,`
Minor fixes 2024-03-14 14:33:04 +00:00			`)`
Add test for new reward 2024-03-03 15:52:34 +00:00			`assert reward_value == -1.0`
Merge remote-tracking branch 'origin/dev' into feature/2736-instantaneous-rewards 2024-08-20 10:40:48 +01:00			`assert ahi.reward_info == {"connection_attempt_status": "failure"}`
Add shared reward test 2024-03-13 14:01:17 +00:00

			`def test_shared_reward():`
			`CFG_PATH = TEST_ASSETS_ROOT / "configs/shared_rewards.yaml"`
			`with open(CFG_PATH, "r") as f:`
			`cfg = yaml.safe_load(f)`

#2476 Add test for episode scheduler 2024-04-25 15:09:46 +01:00			`env = PrimaiteGymEnv(env_config=cfg)`
Add shared reward test 2024-03-13 14:01:17 +00:00
			`env.reset()`

			`order = env.game._reward_calculation_order`
			`assert order.index("defender") > order.index("client_1_green_user")`
			`assert order.index("defender") > order.index("client_2_green_user")`

			`for step in range(256):`
			`act = env.action_space.sample()`
			`env.step(act)`
			`g1_reward = env.game.agents["client_1_green_user"].reward_function.current_reward`
			`g2_reward = env.game.agents["client_2_green_user"].reward_function.current_reward`
			`blue_reward = env.game.agents["defender"].reward_function.current_reward`
			`assert blue_reward == g1_reward + g2_reward`
#2656 - Unit tests for new ActionPenalty reward component, testing yaml and some minor changes to the implementation. Need to update Documentation to detail how this is added 2024-06-26 12:20:28 +01:00

			`def test_action_penalty_loads_from_config():`
			`"""Test to ensure that action penalty is correctly loaded from config into PrimaiteGymEnv"""`
			`CFG_PATH = TEST_ASSETS_ROOT / "configs/action_penalty.yaml"`
			`with open(CFG_PATH, "r") as f:`
			`cfg = yaml.safe_load(f)`

			`env = PrimaiteGymEnv(env_config=cfg)`

			`env.reset()`
#2656 - Make action penalty more configurable 2024-06-27 12:01:32 +01:00			`defender = env.game.agents["defender"]`
			`act_penalty_obj = None`
			`for comp in defender.reward_function.reward_components:`
			`if isinstance(comp[0], ActionPenalty):`
			`act_penalty_obj = comp[0]`
			`if act_penalty_obj is None:`
			`pytest.fail("Action penalty reward component was not added to the agent from config.")`
#2913: Updated tests 2024-11-06 14:52:22 +00:00			`assert act_penalty_obj.config.action_penalty == -0.75`
			`assert act_penalty_obj.config.do_nothing_penalty == 0.125`
#2656 - Make action penalty more configurable 2024-06-27 12:01:32 +01:00

			`def test_action_penalty():`
#2656 - Unit tests for new ActionPenalty reward component, testing yaml and some minor changes to the implementation. Need to update Documentation to detail how this is added 2024-06-26 12:20:28 +01:00			`"""Test that the action penalty is correctly applied when agent performs any action"""`

			`# Create an ActionPenalty Reward`
#2913: Remove from_config() and refactor (WIP). 2024-11-04 17:41:43 +00:00			`schema = ActionPenalty.ConfigSchema(action_penalty=-0.75, do_nothing_penalty=0.125)`
			`# Penalty = ActionPenalty(action_penalty=-0.75, do_nothing_penalty=0.125)`
#2913: Updated tests 2024-11-06 14:52:22 +00:00			`Penalty = ActionPenalty(config=schema)`
#2656 - Unit tests for new ActionPenalty reward component, testing yaml and some minor changes to the implementation. Need to update Documentation to detail how this is added 2024-06-26 12:20:28 +01:00
#3062 - First pass at unifying naming convention for discriminators (still errors) [skip ci] 2025-02-03 16:24:03 +00:00			`# Assert that penalty is applied if action isn't do-nothing`
#2656 - Unit tests for new ActionPenalty reward component, testing yaml and some minor changes to the implementation. Need to update Documentation to detail how this is added 2024-06-26 12:20:28 +01:00			`reward_value = Penalty.calculate(`
#2656 - Make action penalty more configurable 2024-06-27 12:01:32 +01:00			`state={},`
#2656 - Unit tests for new ActionPenalty reward component, testing yaml and some minor changes to the implementation. Need to update Documentation to detail how this is added 2024-06-26 12:20:28 +01:00			`last_action_response=AgentHistoryItem(`
#2656 - Make action penalty more configurable 2024-06-27 12:01:32 +01:00			`timestep=0,`
#3062 - First pass at unifying naming convention for discriminators (still errors) [skip ci] 2025-02-03 16:24:03 +00:00			`action="node-application-execute",`
			`parameters={"node_name": "client", "application_name": "web-browser"},`
#2656 - Make action penalty more configurable 2024-06-27 12:01:32 +01:00			`request=["execute"],`
			`response=RequestResponse.from_bool(True),`
#2656 - Unit tests for new ActionPenalty reward component, testing yaml and some minor changes to the implementation. Need to update Documentation to detail how this is added 2024-06-26 12:20:28 +01:00			`),`
			`)`

#2656 - Make action penalty more configurable 2024-06-27 12:01:32 +01:00			`assert reward_value == -0.75`
#2656 - Unit tests for new ActionPenalty reward component, testing yaml and some minor changes to the implementation. Need to update Documentation to detail how this is added 2024-06-26 12:20:28 +01:00
#3062 - First pass at unifying naming convention for discriminators (still errors) [skip ci] 2025-02-03 16:24:03 +00:00			`# Assert that no penalty applied for a do-nothing action`
#2656 - Unit tests for new ActionPenalty reward component, testing yaml and some minor changes to the implementation. Need to update Documentation to detail how this is added 2024-06-26 12:20:28 +01:00			`reward_value = Penalty.calculate(`
#2656 - Make action penalty more configurable 2024-06-27 12:01:32 +01:00			`state={},`
#2656 - Unit tests for new ActionPenalty reward component, testing yaml and some minor changes to the implementation. Need to update Documentation to detail how this is added 2024-06-26 12:20:28 +01:00			`last_action_response=AgentHistoryItem(`
#2656 - Make action penalty more configurable 2024-06-27 12:01:32 +01:00			`timestep=0,`
#3062 - First pass at unifying naming convention for discriminators (still errors) [skip ci] 2025-02-03 16:24:03 +00:00			`action="do-nothing",`
#2656 - Make action penalty more configurable 2024-06-27 12:01:32 +01:00			`parameters={},`
#3062 - First pass at unifying naming convention for discriminators (still errors) [skip ci] 2025-02-03 16:24:03 +00:00			`request=["do-nothing"],`
#2656 - Make action penalty more configurable 2024-06-27 12:01:32 +01:00			`response=RequestResponse.from_bool(True),`
#2656 - Unit tests for new ActionPenalty reward component, testing yaml and some minor changes to the implementation. Need to update Documentation to detail how this is added 2024-06-26 12:20:28 +01:00			`),`
			`)`

#2656 - Make action penalty more configurable 2024-06-27 12:01:32 +01:00			`assert reward_value == 0.125`


#2913: Updated tests 2024-11-06 14:52:22 +00:00			`def test_action_penalty_e2e(game_and_agent: tuple[PrimaiteGame, ControlledAgent]):`
#2656 - Make action penalty more configurable 2024-06-27 12:01:32 +01:00			`"""Test that we get the right reward for doing actions to fetch a website."""`
			`game, agent = game_and_agent`
			`agent: ControlledAgent`
#2913: Updated tests 2024-11-06 14:52:22 +00:00			`schema = ActionPenalty.ConfigSchema(action_penalty=-0.75, do_nothing_penalty=0.125)`
			`comp = ActionPenalty(config=schema)`
#2656 - Make action penalty more configurable 2024-06-27 12:01:32 +01:00
			`agent.reward_function.register_component(comp, 1.0)`

#3062 - First pass at unifying naming convention for discriminators (still errors) [skip ci] 2025-02-03 16:24:03 +00:00			`action = ("do-nothing", {})`
#2656 - Make action penalty more configurable 2024-06-27 12:01:32 +01:00			`agent.store_action(action)`
			`game.step()`
			`assert agent.reward_function.current_reward == 0.125`

#3062 - First pass at unifying naming convention for discriminators (still errors) [skip ci] 2025-02-03 16:24:03 +00:00			`action = ("node-file-scan", {"node_name": "client", "folder_name": "downloads", "file_name": "document.pdf"})`
#2656 - Make action penalty more configurable 2024-06-27 12:01:32 +01:00			`agent.store_action(action)`
			`game.step()`
			`assert agent.reward_function.current_reward == -0.75`