From 5ad16fdb7eecfd3d0e9f8e6349a0d542066405a7 Mon Sep 17 00:00:00 2001 From: Charlie Crane Date: Tue, 25 Jun 2024 15:36:47 +0100 Subject: [PATCH] #2656 - Corrected from_config() for ActionPenalty so that it can pull the negative reward value from YAML and apply, defaulting to 0 still if not found/not configured. Currerntly prints to terminal when a negative reward is being applied, though this is for implementation and troubleshooting. To be removed before PR is pushed out of draft --- .../config/_package_data/data_manipulation.yaml | 5 +++++ src/primaite/game/agent/rewards.py | 11 +++++++---- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/src/primaite/config/_package_data/data_manipulation.yaml b/src/primaite/config/_package_data/data_manipulation.yaml index 1ec98f39..be613918 100644 --- a/src/primaite/config/_package_data/data_manipulation.yaml +++ b/src/primaite/config/_package_data/data_manipulation.yaml @@ -739,6 +739,11 @@ agents: options: agent_name: client_2_green_user + - type: ACTION_PENALTY + weight: 1.0 + options: + agent_name: defender + penalty_value: -1 agent_settings: flatten_obs: true diff --git a/src/primaite/game/agent/rewards.py b/src/primaite/game/agent/rewards.py index 7d14e097..d75597f0 100644 --- a/src/primaite/game/agent/rewards.py +++ b/src/primaite/game/agent/rewards.py @@ -367,7 +367,7 @@ class ActionPenalty(AbstractReward): Optional Configuration item therefore default value of 0 (?). """ - def __init__(self, agent_name: str, penalty: float = 0): + def __init__(self, agent_name: str, penalty: float): """ Initialise the reward. @@ -382,14 +382,17 @@ class ActionPenalty(AbstractReward): # No penalty for doing nothing at present return 0 else: - return -1 + _LOGGER.info( + f"Blue agent has incurred a penalty of {self.penalty}, for action: {last_action_response.action}" + ) + return self.penalty @classmethod def from_config(cls, config: Dict) -> "ActionPenalty": """Build the ActionPenalty object from config.""" agent_name = config.get("agent_name") - # penalty_value = config.get("ACTION_PENALTY", 0) - return cls(agent_name=agent_name) + penalty_value = config.get("penalty_value", 0) # default to 0 so that no adverse effects. + return cls(agent_name=agent_name, penalty=penalty_value) class RewardFunction: