#2656 - Corrected from_config() for ActionPenalty so that it can pull the negative reward value from YAML and apply, defaulting to 0 still if not found/not configured.

Currerntly prints to terminal when a negative reward is being applied, though this is for implementation and troubleshooting. To be removed before PR is pushed out of draft
This commit is contained in:
Charlie Crane
2024-06-25 15:36:47 +01:00
parent db27bea4ec
commit 5ad16fdb7e
2 changed files with 12 additions and 4 deletions

View File

@@ -739,6 +739,11 @@ agents:
options:
agent_name: client_2_green_user
- type: ACTION_PENALTY
weight: 1.0
options:
agent_name: defender
penalty_value: -1
agent_settings:
flatten_obs: true

View File

@@ -367,7 +367,7 @@ class ActionPenalty(AbstractReward):
Optional Configuration item therefore default value of 0 (?).
"""
def __init__(self, agent_name: str, penalty: float = 0):
def __init__(self, agent_name: str, penalty: float):
"""
Initialise the reward.
@@ -382,14 +382,17 @@ class ActionPenalty(AbstractReward):
# No penalty for doing nothing at present
return 0
else:
return -1
_LOGGER.info(
f"Blue agent has incurred a penalty of {self.penalty}, for action: {last_action_response.action}"
)
return self.penalty
@classmethod
def from_config(cls, config: Dict) -> "ActionPenalty":
"""Build the ActionPenalty object from config."""
agent_name = config.get("agent_name")
# penalty_value = config.get("ACTION_PENALTY", 0)
return cls(agent_name=agent_name)
penalty_value = config.get("penalty_value", 0) # default to 0 so that no adverse effects.
return cls(agent_name=agent_name, penalty=penalty_value)
class RewardFunction: