88 lines
3.9 KiB
Python
88 lines
3.9 KiB
Python
"""Agents with predefined behaviours."""
|
|
from typing import Dict, Optional, Tuple
|
|
|
|
import numpy as np
|
|
import pydantic
|
|
from gymnasium.core import ObsType
|
|
|
|
from primaite.game.agent.actions import ActionManager
|
|
from primaite.game.agent.interface import AbstractScriptedAgent
|
|
from primaite.game.agent.observations import ObservationManager
|
|
from primaite.game.agent.rewards import RewardFunction
|
|
|
|
|
|
class ProbabilisticAgent(AbstractScriptedAgent):
|
|
"""Scripted agent which randomly samples its action space with prescribed probabilities for each action."""
|
|
|
|
class Settings(pydantic.BaseModel):
|
|
"""Config schema for Probabilistic agent settings."""
|
|
|
|
model_config = pydantic.ConfigDict(extra="forbid")
|
|
"""Strict validation."""
|
|
action_probabilities: Dict[int, float]
|
|
"""Probability to perform each action in the action map. The sum of probabilities should sum to 1."""
|
|
random_seed: Optional[int] = None
|
|
"""Random seed. If set, each episode the agent will choose the same random sequence of actions."""
|
|
# TODO: give the option to still set a random seed, but have it vary each episode in a predictable way
|
|
# for example if the user sets seed 123, have it be 123 + episode_num, so that each ep it's the next seed.
|
|
|
|
@pydantic.field_validator("action_probabilities", mode="after")
|
|
@classmethod
|
|
def probabilities_sum_to_one(cls, v: Dict[int, float]) -> Dict[int, float]:
|
|
"""Make sure the probabilities sum to 1."""
|
|
if not abs(sum(v.values()) - 1) < 1e-6:
|
|
raise ValueError("Green action probabilities must sum to 1")
|
|
return v
|
|
|
|
@pydantic.field_validator("action_probabilities", mode="after")
|
|
@classmethod
|
|
def action_map_covered_correctly(cls, v: Dict[int, float]) -> Dict[int, float]:
|
|
"""Ensure that the keys of the probability dictionary cover all integers from 0 to N."""
|
|
if not all((i in v) for i in range(len(v))):
|
|
raise ValueError(
|
|
"Green action probabilities must be defined as a mapping where the keys are consecutive integers "
|
|
"from 0 to N."
|
|
)
|
|
return v
|
|
|
|
def __init__(
|
|
self,
|
|
agent_name: str,
|
|
action_space: Optional[ActionManager],
|
|
observation_space: Optional[ObservationManager],
|
|
reward_function: Optional[RewardFunction],
|
|
settings: Dict = {},
|
|
) -> None:
|
|
# If the action probabilities are not specified, create equal probabilities for all actions
|
|
if "action_probabilities" not in settings:
|
|
num_actions = len(action_space.action_map)
|
|
settings = {"action_probabilities": {i: 1 / num_actions for i in range(num_actions)}}
|
|
|
|
# If seed not specified, set it to None so that numpy chooses a random one.
|
|
settings.setdefault("random_seed")
|
|
|
|
self.settings = ProbabilisticAgent.Settings(**settings)
|
|
|
|
self.rng = np.random.default_rng(self.settings.random_seed)
|
|
|
|
# convert probabilities from
|
|
self.probabilities = np.asarray(list(self.settings.action_probabilities.values()))
|
|
|
|
super().__init__(agent_name, action_space, observation_space, reward_function)
|
|
|
|
def get_action(self, obs: ObsType, reward: float = 0.0, timestep: Optional[int] = None) -> Tuple[str, Dict]:
|
|
"""
|
|
Choose a random action from the action space.
|
|
|
|
The probability of each action is given by the corresponding index in ``self.probabilities``.
|
|
|
|
:param obs: Current observation of the simulation
|
|
:type obs: ObsType
|
|
:param reward: Reward for the last step, not used for scripted agents, defaults to 0
|
|
:type reward: float, optional
|
|
:return: Action to be taken in CAOS format.
|
|
:rtype: Tuple[str, Dict]
|
|
"""
|
|
choice = self.rng.choice(len(self.action_manager.action_map), p=self.probabilities)
|
|
return self.action_manager.get_action(choice)
|