diff --git a/src/primaite/config/_package_data/data_manipulation.yaml b/src/primaite/config/_package_data/data_manipulation.yaml index 1ec98f39..be613918 100644 --- a/src/primaite/config/_package_data/data_manipulation.yaml +++ b/src/primaite/config/_package_data/data_manipulation.yaml @@ -739,6 +739,11 @@ agents: options: agent_name: client_2_green_user + - type: ACTION_PENALTY + weight: 1.0 + options: + agent_name: defender + penalty_value: -1 agent_settings: flatten_obs: true diff --git a/src/primaite/game/agent/rewards.py b/src/primaite/game/agent/rewards.py index 7d14e097..d75597f0 100644 --- a/src/primaite/game/agent/rewards.py +++ b/src/primaite/game/agent/rewards.py @@ -367,7 +367,7 @@ class ActionPenalty(AbstractReward): Optional Configuration item therefore default value of 0 (?). """ - def __init__(self, agent_name: str, penalty: float = 0): + def __init__(self, agent_name: str, penalty: float): """ Initialise the reward. @@ -382,14 +382,17 @@ class ActionPenalty(AbstractReward): # No penalty for doing nothing at present return 0 else: - return -1 + _LOGGER.info( + f"Blue agent has incurred a penalty of {self.penalty}, for action: {last_action_response.action}" + ) + return self.penalty @classmethod def from_config(cls, config: Dict) -> "ActionPenalty": """Build the ActionPenalty object from config.""" agent_name = config.get("agent_name") - # penalty_value = config.get("ACTION_PENALTY", 0) - return cls(agent_name=agent_name) + penalty_value = config.get("penalty_value", 0) # default to 0 so that no adverse effects. + return cls(agent_name=agent_name, penalty=penalty_value) class RewardFunction: