From 5ad16fdb7eecfd3d0e9f8e6349a0d542066405a7 Mon Sep 17 00:00:00 2001
From: Charlie Crane <charlie.crane@t-s.qinetiq.com>
Date: Tue, 25 Jun 2024 15:36:47 +0100
Subject: [PATCH] #2656 - Corrected from_config() for ActionPenalty so that it
 can pull the negative reward value from YAML and apply, defaulting to 0 still
 if not found/not configured. Currerntly prints to terminal when a negative
 reward is being applied, though this is for implementation and
 troubleshooting. To be removed before PR is pushed out of draft

---
 .../config/_package_data/data_manipulation.yaml       |  5 +++++
 src/primaite/game/agent/rewards.py                    | 11 +++++++----
 2 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/src/primaite/config/_package_data/data_manipulation.yaml b/src/primaite/config/_package_data/data_manipulation.yaml
index 1ec98f39..be613918 100644
--- a/src/primaite/config/_package_data/data_manipulation.yaml
+++ b/src/primaite/config/_package_data/data_manipulation.yaml
@@ -739,6 +739,11 @@ agents:
           options:
             agent_name: client_2_green_user
 
+        - type: ACTION_PENALTY
+          weight: 1.0
+          options:
+            agent_name: defender
+            penalty_value: -1
 
     agent_settings:
       flatten_obs: true
diff --git a/src/primaite/game/agent/rewards.py b/src/primaite/game/agent/rewards.py
index 7d14e097..d75597f0 100644
--- a/src/primaite/game/agent/rewards.py
+++ b/src/primaite/game/agent/rewards.py
@@ -367,7 +367,7 @@ class ActionPenalty(AbstractReward):
     Optional Configuration item therefore default value of 0 (?).
     """
 
-    def __init__(self, agent_name: str, penalty: float = 0):
+    def __init__(self, agent_name: str, penalty: float):
         """
         Initialise the reward.
 
@@ -382,14 +382,17 @@ class ActionPenalty(AbstractReward):
             # No penalty for doing nothing at present
             return 0
         else:
-            return -1
+            _LOGGER.info(
+                f"Blue agent has incurred a penalty of {self.penalty}, for action: {last_action_response.action}"
+            )
+            return self.penalty
 
     @classmethod
     def from_config(cls, config: Dict) -> "ActionPenalty":
         """Build the ActionPenalty object from config."""
         agent_name = config.get("agent_name")
-        # penalty_value = config.get("ACTION_PENALTY", 0)
-        return cls(agent_name=agent_name)
+        penalty_value = config.get("penalty_value", 0)  # default to 0 so that no adverse effects.
+        return cls(agent_name=agent_name, penalty=penalty_value)
 
 
 class RewardFunction: