From 02d29f7fb9bb93ebe32c13503ee3e56dfe545369 Mon Sep 17 00:00:00 2001
From: Nick Todd <nick.todd@t-s.qinetiq.com>
Date: Thu, 7 Nov 2024 16:35:39 +0000
Subject: [PATCH] #2913: Updates to How-To guide

---
 .../how_to_guides/extensible_rewards.rst      | 60 +++++++------------
 1 file changed, 21 insertions(+), 39 deletions(-)

diff --git a/docs/source/how_to_guides/extensible_rewards.rst b/docs/source/how_to_guides/extensible_rewards.rst
index 2551eee0..4dd24110 100644
--- a/docs/source/how_to_guides/extensible_rewards.rst
+++ b/docs/source/how_to_guides/extensible_rewards.rst
@@ -6,65 +6,47 @@
 
 Extensible Rewards
 ******************
+Extensible Rewards differ from the previous reward mechanism used in PrimAITE v3.x as new reward
+types can be added without requiring a change to the RewardFunction class in rewards.py (PrimAITE
+core repository).
 
 Changes to reward class structure.
 ==================================
 
 Reward classes are inherited from AbstractReward (a sub-class of Pydantic's BaseModel).
-Within the reward class there is a ConfigSchema class responsible for ensuring config file data is
-in the correct format. The `.from_config()` method is generally unchanged but should initialise the
-attributes edfined in the ConfigSchema.
+Within the reward class there is a ConfigSchema class responsible for ensuring the config file data
+is in the correct format. This also means there is little (if no) requirement for and `__init__`
+method. The `.from_config` method is no longer required as it's inherited from `AbstractReward`.
 Each class requires an identifier string which is used by the ConfigSchema class to verify that it
 hasn't previously been added to the registry.
 
 Inheriting from `BaseModel` removes the need for an `__init__` method but means that object
 attributes need to be passed by keyword.
 
-.. code:: Python
+To add a new reward class follow the example below. Note that the type attribute in the
+`ConfigSchema` class should match the type used in the config file to define the reward.
 
-class AbstractReward(BaseModel):
-    """Base class for reward function components."""
+.. code-block:: Python
 
-    class ConfigSchema(BaseModel, ABC):
-        """Config schema for AbstractReward."""
+class DatabaseFileIntegrity(AbstractReward, identifier="DATABASE_FILE_INTEGRITY"):
+    """Reward function component which rewards the agent for maintaining the integrity of a database file."""
 
-        type: str
+    config: "DatabaseFileIntegrity.ConfigSchema"
+    location_in_state: List[str] = [""]
+    reward: float = 0.0
 
-    _registry: ClassVar[Dict[str, Type["AbstractReward"]]] = {}
+    class ConfigSchema(AbstractReward.ConfigSchema):
+        """ConfigSchema for DatabaseFileIntegrity."""
 
-    def __init_subclass__(cls, identifier: str, **kwargs: Any) -> None:
-        super().__init_subclass__(**kwargs)
-        if identifier in cls._registry:
-            raise ValueError(f"Duplicate node adder {identifier}")
-        cls._registry[identifier] = cls
+        type: str = "DATABASE_FILE_INTEGRITY"
+        node_hostname: str
+        folder_name: str
+        file_name: str
 
-    @classmethod
-    def from_config(cls, config: Dict) -> "AbstractReward":
-        """Create a reward function component from a config dictionary.
-
-        :param config: dict of options for the reward component's constructor
-        :type config: dict
-        :return: The reward component.
-        :rtype: AbstractReward
-        """
-        if config["type"] not in cls._registry:
-            raise ValueError(f"Invalid reward type {config['type']}")
-        adder_class = cls._registry[config["type"]]
-        adder_class.add_nodes_to_net(config=adder_class.ConfigSchema(**config))
-        return cls
-
-    @abstractmethod
     def calculate(self, state: Dict, last_action_response: "AgentHistoryItem") -> float:
         """Calculate the reward for the current state.
+        pass
 
-        :param state: Current simulation state
-        :type state: Dict
-        :param last_action_response: Current agent history state
-        :type last_action_response: AgentHistoryItem state
-        :return: Reward value
-        :rtype: float
-        """
-        return 0.0
 
 
 Changes to YAML file.