#2656 - Committing current state before lunch. New ActionPenalty reward added. Basic implementation returns a -1 reward if last_action_response.action isn't DONOTHING. Minor change in data_manipulation so I can see it working in the data_manipulation notebook. Need to use configured values but so far, promising?. Looks to result in a better average reward than without which is good, I think.

This commit is contained in:
Charlie Crane
2024-06-25 12:29:01 +01:00
parent 4a81dc3b2c
commit db27bea4ec
2 changed files with 33 additions and 1 deletions

View File

@@ -740,7 +740,6 @@ agents:
agent_name: client_2_green_user
agent_settings:
flatten_obs: true

View File

@@ -360,6 +360,38 @@ class SharedReward(AbstractReward):
return cls(agent_name=agent_name)
class ActionPenalty(AbstractReward):
"""
Apply a negative reward when taking any action except DONOTHING.
Optional Configuration item therefore default value of 0 (?).
"""
def __init__(self, agent_name: str, penalty: float = 0):
"""
Initialise the reward.
Penalty will default to 0, as this is an optional param.
"""
self.agent_name = agent_name
self.penalty = penalty
def calculate(self, state: Dict, last_action_response: "AgentHistoryItem") -> float:
"""Calculate the penalty to be applied."""
if last_action_response.action == "DONOTHING":
# No penalty for doing nothing at present
return 0
else:
return -1
@classmethod
def from_config(cls, config: Dict) -> "ActionPenalty":
"""Build the ActionPenalty object from config."""
agent_name = config.get("agent_name")
# penalty_value = config.get("ACTION_PENALTY", 0)
return cls(agent_name=agent_name)
class RewardFunction:
"""Manages the reward function for the agent."""
@@ -370,6 +402,7 @@ class RewardFunction:
"WEBPAGE_UNAVAILABLE_PENALTY": WebpageUnavailablePenalty,
"GREEN_ADMIN_DATABASE_UNREACHABLE_PENALTY": GreenAdminDatabaseUnreachablePenalty,
"SHARED_REWARD": SharedReward,
"ACTION_PENALTY": ActionPenalty,
}
"""List of reward class identifiers."""