#2656 - Corrected from_config() for ActionPenalty so that it can pull the negative reward value from YAML and apply, defaulting to 0 still if not found/not configured.

Currerntly prints to terminal when a negative reward is being applied, though this is for implementation and troubleshooting. To be removed before PR is pushed out of draft
2024-06-25 15:36:47 +01:00
parent db27bea4ec
commit 5ad16fdb7e
2 changed files with 12 additions and 4 deletions
--- a/src/primaite/config/_package_data/data_manipulation.yaml
+++ b/src/primaite/config/_package_data/data_manipulation.yaml
@@ -739,6 +739,11 @@ agents:
          options:
            agent_name: client_2_green_user

+        - type: ACTION_PENALTY
+          weight: 1.0
+          options:
+            agent_name: defender
+            penalty_value: -1

    agent_settings:
      flatten_obs: true
--- a/src/primaite/game/agent/rewards.py
+++ b/src/primaite/game/agent/rewards.py
@@ -367,7 +367,7 @@ class ActionPenalty(AbstractReward):
    Optional Configuration item therefore default value of 0 (?).
    """

-    def __init__(self, agent_name: str, penalty: float = 0):
+    def __init__(self, agent_name: str, penalty: float):
        """
        Initialise the reward.

@@ -382,14 +382,17 @@ class ActionPenalty(AbstractReward):
            # No penalty for doing nothing at present
            return 0
        else:
-            return -1
+            _LOGGER.info(
+                f"Blue agent has incurred a penalty of {self.penalty}, for action: {last_action_response.action}"
+            )
+            return self.penalty

    @classmethod
    def from_config(cls, config: Dict) -> "ActionPenalty":
        """Build the ActionPenalty object from config."""
        agent_name = config.get("agent_name")
-        # penalty_value = config.get("ACTION_PENALTY", 0)
-        return cls(agent_name=agent_name)
+        penalty_value = config.get("penalty_value", 0)  # default to 0 so that no adverse effects.
+        return cls(agent_name=agent_name, penalty=penalty_value)


 class RewardFunction: