#2656 - Committing current state before lunch. New ActionPenalty reward added. Basic implementation returns a -1 reward if last_action_response.action isn't DONOTHING. Minor change in data_manipulation so I can see it working in the data_manipulation notebook. Need to use configured values but so far, promising?. Looks to result in a better average reward than without which is good, I think.

2024-06-25 12:29:01 +01:00
parent 4a81dc3b2c
commit db27bea4ec
2 changed files with 33 additions and 1 deletions
--- a/src/primaite/config/_package_data/data_manipulation.yaml
+++ b/src/primaite/config/_package_data/data_manipulation.yaml
@@ -740,7 +740,6 @@ agents:
            agent_name: client_2_green_user


-
    agent_settings:
      flatten_obs: true

--- a/src/primaite/game/agent/rewards.py
+++ b/src/primaite/game/agent/rewards.py
@@ -360,6 +360,38 @@ class SharedReward(AbstractReward):
        return cls(agent_name=agent_name)


+class ActionPenalty(AbstractReward):
+    """
+    Apply a negative reward when taking any action except DONOTHING.
+
+    Optional Configuration item therefore default value of 0 (?).
+    """
+
+    def __init__(self, agent_name: str, penalty: float = 0):
+        """
+        Initialise the reward.
+
+        Penalty will default to 0, as this is an optional param.
+        """
+        self.agent_name = agent_name
+        self.penalty = penalty
+
+    def calculate(self, state: Dict, last_action_response: "AgentHistoryItem") -> float:
+        """Calculate the penalty to be applied."""
+        if last_action_response.action == "DONOTHING":
+            # No penalty for doing nothing at present
+            return 0
+        else:
+            return -1
+
+    @classmethod
+    def from_config(cls, config: Dict) -> "ActionPenalty":
+        """Build the ActionPenalty object from config."""
+        agent_name = config.get("agent_name")
+        # penalty_value = config.get("ACTION_PENALTY", 0)
+        return cls(agent_name=agent_name)
+
+
 class RewardFunction:
    """Manages the reward function for the agent."""

@@ -370,6 +402,7 @@ class RewardFunction:
        "WEBPAGE_UNAVAILABLE_PENALTY": WebpageUnavailablePenalty,
        "GREEN_ADMIN_DATABASE_UNREACHABLE_PENALTY": GreenAdminDatabaseUnreachablePenalty,
        "SHARED_REWARD": SharedReward,
+        "ACTION_PENALTY": ActionPenalty,
    }
    """List of reward class identifiers."""