PrimAITE/src/primaite/game/agent/interface.py

# TODO: remove this comment... This is just here to point out that I've named this 'actor' rather than 'agent'
# That's because I want to point out that this is disctinct from 'agent' in the reinforcement learning sense of the word
# If you disagree, make a comment in the PR review and we can discuss
from abc import ABC, abstractmethod
from typing import Any, Dict, List, Optional, Tuple, TypeAlias, Union

import numpy as np

from primaite.game.agent.actions import ActionManager
from primaite.game.agent.observations import ObservationSpace
from primaite.game.agent.rewards import RewardFunction

ObsType: TypeAlias = Union[Dict, np.ndarray]


class AbstractAgent(ABC):
    """Base class for scripted and RL agents."""

    def __init__(
        self,
        agent_name: Optional[str],
        action_space: Optional[ActionManager],
        observation_space: Optional[ObservationSpace],
        reward_function: Optional[RewardFunction],
    ) -> None:
        self.agent_name:str = agent_name or "unnamed_agent"
        self.action_space: Optional[ActionManager] = action_space
        self.observation_space: Optional[ObservationSpace] = observation_space
        self.reward_function: Optional[RewardFunction] = reward_function

        # exection definiton converts CAOS action to Primaite simulator request, sometimes having to enrich the info
        # by for example specifying target ip addresses, or converting a node ID into a uuid
        self.execution_definition = None

    def convert_state_to_obs(self, state: Dict) -> ObsType:
        """
        state : dict state directly from simulation.describe_state
        output : dict state according to CAOS.
        """
        return self.observation_space.observe(state)

    def calculate_reward_from_state(self, state: Dict) -> float:
        return self.reward_function.calculate(state)

    @abstractmethod
    def get_action(self, obs: ObsType, reward: float = None) -> Tuple[str, Dict]:
        # in RL agent, this method will send CAOS observation to GATE RL agent, then receive a int 0-39,
        # then use a bespoke conversion to take 1-40 int back into CAOS action
        return ("DO_NOTHING", {} )

    def format_request(self, action:Tuple[str,Dict], options:Dict[str, int]) -> List[str]:
        # this will take something like APPLICATION.EXECUTE and add things like target_ip_address in simulator.
        # therefore the execution definition needs to be a mapping from CAOS into SIMULATOR
        """Format action into format expected by the simulator, and apply execution definition if applicable."""
        request = self.action_space.form_request(action_identifier=action, action_options=options)
        return request


class AbstractScriptedAgent(AbstractAgent):
    """Base class for actors which generate their own behaviour."""

    ...


class RandomAgent(AbstractScriptedAgent):
    """Agent that ignores its observation and acts completely at random."""

    def get_action(self, obs: ObsType, reward: float = None) -> Tuple[str, Dict]:
        return self.action_space.get_action(self.action_space.space.sample())


class AbstractGATEAgent(AbstractAgent):
    """Base class for actors controlled via external messages, such as RL policies."""

    ...
put in agent parsing skeleton 2023-09-26 12:54:56 +01:00			`# TODO: remove this comment... This is just here to point out that I've named this 'actor' rather than 'agent'`
			`# That's because I want to point out that this is disctinct from 'agent' in the reinforcement learning sense of the word`
			`# If you disagree, make a comment in the PR review and we can discuss`
			`from abc import ABC, abstractmethod`
Get primaite session step working 2023-10-06 20:32:52 +01:00			`from typing import Any, Dict, List, Optional, Tuple, TypeAlias, Union`
Add network action 2023-10-06 10:36:29 +01:00
Finalise actions interface 2023-10-02 17:21:43 +01:00			`import numpy as np`
put in agent parsing skeleton 2023-09-26 12:54:56 +01:00
Finalise actions interface 2023-10-02 17:21:43 +01:00			`from primaite.game.agent.actions import ActionManager`
put in agent parsing skeleton 2023-09-26 12:54:56 +01:00			`from primaite.game.agent.observations import ObservationSpace`
			`from primaite.game.agent.rewards import RewardFunction`

Add network action 2023-10-06 10:36:29 +01:00			`ObsType: TypeAlias = Union[Dict, np.ndarray]`

put in agent parsing skeleton 2023-09-26 12:54:56 +01:00
			`class AbstractAgent(ABC):`
			`"""Base class for scripted and RL agents."""`

			`def __init__(`
			`self,`
Change observations to make loading from config better 2023-10-08 17:02:54 +01:00			`agent_name: Optional[str],`
Finalise actions interface 2023-10-02 17:21:43 +01:00			`action_space: Optional[ActionManager],`
put in agent parsing skeleton 2023-09-26 12:54:56 +01:00			`observation_space: Optional[ObservationSpace],`
			`reward_function: Optional[RewardFunction],`
			`) -> None:`
Change observations to make loading from config better 2023-10-08 17:02:54 +01:00			`self.agent_name:str = agent_name or "unnamed_agent"`
Finalise actions interface 2023-10-02 17:21:43 +01:00			`self.action_space: Optional[ActionManager] = action_space`
put in agent parsing skeleton 2023-09-26 12:54:56 +01:00			`self.observation_space: Optional[ObservationSpace] = observation_space`
			`self.reward_function: Optional[RewardFunction] = reward_function`

Finalise actions interface 2023-10-02 17:21:43 +01:00			`# exection definiton converts CAOS action to Primaite simulator request, sometimes having to enrich the info`
			`# by for example specifying target ip addresses, or converting a node ID into a uuid`
			`self.execution_definition = None`

Add network action 2023-10-06 10:36:29 +01:00			`def convert_state_to_obs(self, state: Dict) -> ObsType:`
Finalise actions interface 2023-10-02 17:21:43 +01:00			`"""`
			`state : dict state directly from simulation.describe_state`
			`output : dict state according to CAOS.`
			`"""`
			`return self.observation_space.observe(state)`

Add network action 2023-10-06 10:36:29 +01:00			`def calculate_reward_from_state(self, state: Dict) -> float:`
Finalise actions interface 2023-10-02 17:21:43 +01:00			`return self.reward_function.calculate(state)`

			`@abstractmethod`
Get primaite session step working 2023-10-06 20:32:52 +01:00			`def get_action(self, obs: ObsType, reward: float = None) -> Tuple[str, Dict]:`
Add network action 2023-10-06 10:36:29 +01:00			`# in RL agent, this method will send CAOS observation to GATE RL agent, then receive a int 0-39,`
Finalise actions interface 2023-10-02 17:21:43 +01:00			`# then use a bespoke conversion to take 1-40 int back into CAOS action`
Get primaite session step working 2023-10-06 20:32:52 +01:00			`return ("DO_NOTHING", {} )`
Finalise actions interface 2023-10-02 17:21:43 +01:00
Get primaite session step working 2023-10-06 20:32:52 +01:00			`def format_request(self, action:Tuple[str,Dict], options:Dict[str, int]) -> List[str]:`
Finalise actions interface 2023-10-02 17:21:43 +01:00			`# this will take something like APPLICATION.EXECUTE and add things like target_ip_address in simulator.`
			`# therefore the execution definition needs to be a mapping from CAOS into SIMULATOR`
			`"""Format action into format expected by the simulator, and apply execution definition if applicable."""`
Get primaite session step working 2023-10-06 20:32:52 +01:00			`request = self.action_space.form_request(action_identifier=action, action_options=options)`
			`return request`
Finalise actions interface 2023-10-02 17:21:43 +01:00
put in agent parsing skeleton 2023-09-26 12:54:56 +01:00
			`class AbstractScriptedAgent(AbstractAgent):`
			`"""Base class for actors which generate their own behaviour."""`

			`...`

Add network action 2023-10-06 10:36:29 +01:00
Finalise actions interface 2023-10-02 17:21:43 +01:00			`class RandomAgent(AbstractScriptedAgent):`
			`"""Agent that ignores its observation and acts completely at random."""`

Get primaite session step working 2023-10-06 20:32:52 +01:00			`def get_action(self, obs: ObsType, reward: float = None) -> Tuple[str, Dict]:`
			`return self.action_space.get_action(self.action_space.space.sample())`
Finalise actions interface 2023-10-02 17:21:43 +01:00
put in agent parsing skeleton 2023-09-26 12:54:56 +01:00
			`class AbstractGATEAgent(AbstractAgent):`
			`"""Base class for actors controlled via external messages, such as RL policies."""`

			`...`