diff --git a/src/primaite/game/agent/interface.py b/src/primaite/game/agent/interface.py index 0531b25f..91fa03d4 100644 --- a/src/primaite/game/agent/interface.py +++ b/src/primaite/game/agent/interface.py @@ -141,8 +141,6 @@ class AbstractAgent(ABC): :param obs: Observation of the environment. :type obs: ObsType - :param reward: Reward from the previous action, defaults to None TODO: should this parameter even be accepted? - :type reward: float, optional :param timestep: The current timestep in the simulation, used for non-RL agents. Optional :type timestep: int :return: Action to be taken in the environment. diff --git a/src/primaite/game/agent/rewards.py b/src/primaite/game/agent/rewards.py index a2ffd875..d8cb1328 100644 --- a/src/primaite/game/agent/rewards.py +++ b/src/primaite/game/agent/rewards.py @@ -43,9 +43,7 @@ class AbstractReward: """Base class for reward function components.""" @abstractmethod - def calculate( - self, state: Dict, last_action_response: "AgentActionHistoryItem" - ) -> float: # todo maybe make last_action_response optional? + def calculate(self, state: Dict, last_action_response: "AgentActionHistoryItem") -> float: """Calculate the reward for the current state.""" return 0.0 @@ -65,9 +63,7 @@ class AbstractReward: class DummyReward(AbstractReward): """Dummy reward function component which always returns 0.""" - def calculate( - self, state: Dict, last_action_response: "AgentActionHistoryItem" - ) -> float: # todo maybe make last_action_response optional? + def calculate(self, state: Dict, last_action_response: "AgentActionHistoryItem") -> float: """Calculate the reward for the current state.""" return 0.0 @@ -107,9 +103,7 @@ class DatabaseFileIntegrity(AbstractReward): file_name, ] - def calculate( - self, state: Dict, last_action_response: "AgentActionHistoryItem" - ) -> float: # todo maybe make last_action_response optional? + def calculate(self, state: Dict, last_action_response: "AgentActionHistoryItem") -> float: """Calculate the reward for the current state. :param state: The current state of the simulation. @@ -164,9 +158,7 @@ class WebServer404Penalty(AbstractReward): """ self.location_in_state = ["network", "nodes", node_hostname, "services", service_name] - def calculate( - self, state: Dict, last_action_response: "AgentActionHistoryItem" - ) -> float: # todo maybe make last_action_response optional? + def calculate(self, state: Dict, last_action_response: "AgentActionHistoryItem") -> float: """Calculate the reward for the current state. :param state: The current state of the simulation. @@ -220,9 +212,7 @@ class WebpageUnavailablePenalty(AbstractReward): self.location_in_state: List[str] = ["network", "nodes", node_hostname, "applications", "WebBrowser"] self._last_request_failed: bool = False - def calculate( - self, state: Dict, last_action_response: "AgentActionHistoryItem" - ) -> float: # todo maybe make last_action_response optional? + def calculate(self, state: Dict, last_action_response: "AgentActionHistoryItem") -> float: """ Calculate the reward based on current simulation state, and the recent agent action. @@ -397,9 +387,7 @@ class RewardFunction: """ self.reward_components.append((component, weight)) - def update( - self, state: Dict, last_action_response: "AgentActionHistoryItem" - ) -> float: # todo maybe make last_action_response optional? + def update(self, state: Dict, last_action_response: "AgentActionHistoryItem") -> float: """Calculate the overall reward for the current state. :param state: The current state of the simulation.