"""Agents with predefined behaviours.""" from typing import Dict, Optional, Tuple import numpy as np import pydantic from gymnasium.core import ObsType from primaite.game.agent.actions import ActionManager from primaite.game.agent.interface import AbstractScriptedAgent from primaite.game.agent.observations import ObservationManager from primaite.game.agent.rewards import RewardFunction class ProbabilisticAgent(AbstractScriptedAgent): """Scripted agent which randomly samples its action space with prescribed probabilities for each action.""" class Settings(pydantic.BaseModel): """Config schema for Probabilistic agent settings.""" model_config = pydantic.ConfigDict(extra="forbid") """Strict validation.""" action_probabilities: Dict[int, float] """Probability to perform each action in the action map. The sum of probabilities should sum to 1.""" random_seed: Optional[int] = None """Random seed. If set, each episode the agent will choose the same random sequence of actions.""" # TODO: give the option to still set a random seed, but have it vary each episode in a predictable way # for example if the user sets seed 123, have it be 123 + episode_num, so that each ep it's the next seed. @pydantic.field_validator("action_probabilities", mode="after") @classmethod def probabilities_sum_to_one(cls, v: Dict[int, float]) -> Dict[int, float]: """Make sure the probabilities sum to 1.""" if not abs(sum(v.values()) - 1) < 1e-6: raise ValueError("Green action probabilities must sum to 1") return v @pydantic.field_validator("action_probabilities", mode="after") @classmethod def action_map_covered_correctly(cls, v: Dict[int, float]) -> Dict[int, float]: """Ensure that the keys of the probability dictionary cover all integers from 0 to N.""" if not all((i in v) for i in range(len(v))): raise ValueError( "Green action probabilities must be defined as a mapping where the keys are consecutive integers " "from 0 to N." ) return v def __init__( self, agent_name: str, action_space: Optional[ActionManager], observation_space: Optional[ObservationManager], reward_function: Optional[RewardFunction], settings: Dict = {}, ) -> None: # If the action probabilities are not specified, create equal probabilities for all actions if "action_probabilities" not in settings: num_actions = len(action_space.action_map) settings = {"action_probabilities": {i: 1 / num_actions for i in range(num_actions)}} # If seed not specified, set it to None so that numpy chooses a random one. settings.setdefault("random_seed") self.settings = ProbabilisticAgent.Settings(**settings) self.rng = np.random.default_rng(self.settings.random_seed) # convert probabilities from self.probabilities = np.asarray(list(self.settings.action_probabilities.values())) super().__init__(agent_name, action_space, observation_space, reward_function) def get_action(self, obs: ObsType, reward: float = 0.0, timestep: Optional[int] = None) -> Tuple[str, Dict]: """ Choose a random action from the action space. The probability of each action is given by the corresponding index in ``self.probabilities``. :param obs: Current observation of the simulation :type obs: ObsType :param reward: Reward for the last step, not used for scripted agents, defaults to 0 :type reward: float, optional :return: Action to be taken in CAOS format. :rtype: Tuple[str, Dict] """ choice = self.rng.choice(len(self.action_manager.action_map), p=self.probabilities) return self.action_manager.get_action(choice)