PrimAITE/src/primaite/game/agent/interface.py

# TODO: remove this comment... This is just here to point out that I've named this 'actor' rather than 'agent'
# That's because I want to point out that this is disctinct from 'agent' in the reinforcement learning sense of the word
# If you disagree, make a comment in the PR review and we can discuss
from abc import ABC, abstractmethod
from typing import Any, Dict, List, Optional, Union, TypeAlias
import numpy as np

from primaite.game.agent.actions import ActionManager
from primaite.game.agent.observations import ObservationSpace
from primaite.game.agent.rewards import RewardFunction

ObsType:TypeAlias = Union[Dict, np.ndarray]

class AbstractAgent(ABC):
    """Base class for scripted and RL agents."""

    def __init__(
        self,
        action_space: Optional[ActionManager],
        observation_space: Optional[ObservationSpace],
        reward_function: Optional[RewardFunction],
    ) -> None:
        self.action_space: Optional[ActionManager] = action_space
        self.observation_space: Optional[ObservationSpace] = observation_space
        self.reward_function: Optional[RewardFunction] = reward_function

        # exection definiton converts CAOS action to Primaite simulator request, sometimes having to enrich the info
        # by for example specifying target ip addresses, or converting a node ID into a uuid
        self.execution_definition = None

    def get_obs_from_state(self, state:Dict) -> ObsType:
        """
        state : dict state directly from simulation.describe_state
        output : dict state according to CAOS.
        """
        return self.observation_space.observe(state)

    def get_reward_from_state(self, state:Dict) -> float:
        return self.reward_function.calculate(state)

    @abstractmethod
    def get_action(self, obs:ObsType, reward:float=None):
        # in RL agent, this method will send CAOS observation to GATE RL agent, then receive a int 1-40,
        # then use a bespoke conversion to take 1-40 int back into CAOS action
        return ('NODE', 'SERVICE', 'SCAN', '<fake-node-sid>', '<fake-service-sid>')

    @abstractmethod
    def format_request(self, action) -> List[str]:
        # this will take something like APPLICATION.EXECUTE and add things like target_ip_address in simulator.
        # therefore the execution definition needs to be a mapping from CAOS into SIMULATOR
        """Format action into format expected by the simulator, and apply execution definition if applicable."""
        return ['network', 'nodes', '<fake-node-uuid>', 'file_system', 'folder', 'root', 'scan']


class AbstractScriptedAgent(AbstractAgent):
    """Base class for actors which generate their own behaviour."""

    ...

class RandomAgent(AbstractScriptedAgent):
    """Agent that ignores its observation and acts completely at random."""

    def get_action(self, obs:ObsType, reward:float=None):
        return self.action_space.space.sample()


class AbstractGATEAgent(AbstractAgent):
    """Base class for actors controlled via external messages, such as RL policies."""

    ...
put in agent parsing skeleton 2023-09-26 12:54:56 +01:00			`# TODO: remove this comment... This is just here to point out that I've named this 'actor' rather than 'agent'`
			`# That's because I want to point out that this is disctinct from 'agent' in the reinforcement learning sense of the word`
			`# If you disagree, make a comment in the PR review and we can discuss`
			`from abc import ABC, abstractmethod`
Finalise actions interface 2023-10-02 17:21:43 +01:00			`from typing import Any, Dict, List, Optional, Union, TypeAlias`
			`import numpy as np`
put in agent parsing skeleton 2023-09-26 12:54:56 +01:00
Finalise actions interface 2023-10-02 17:21:43 +01:00			`from primaite.game.agent.actions import ActionManager`
put in agent parsing skeleton 2023-09-26 12:54:56 +01:00			`from primaite.game.agent.observations import ObservationSpace`
			`from primaite.game.agent.rewards import RewardFunction`

Finalise actions interface 2023-10-02 17:21:43 +01:00			`ObsType:TypeAlias = Union[Dict, np.ndarray]`
put in agent parsing skeleton 2023-09-26 12:54:56 +01:00
			`class AbstractAgent(ABC):`
			`"""Base class for scripted and RL agents."""`

			`def __init__(`
			`self,`
Finalise actions interface 2023-10-02 17:21:43 +01:00			`action_space: Optional[ActionManager],`
put in agent parsing skeleton 2023-09-26 12:54:56 +01:00			`observation_space: Optional[ObservationSpace],`
			`reward_function: Optional[RewardFunction],`
			`) -> None:`
Finalise actions interface 2023-10-02 17:21:43 +01:00			`self.action_space: Optional[ActionManager] = action_space`
put in agent parsing skeleton 2023-09-26 12:54:56 +01:00			`self.observation_space: Optional[ObservationSpace] = observation_space`
			`self.reward_function: Optional[RewardFunction] = reward_function`

Finalise actions interface 2023-10-02 17:21:43 +01:00			`# exection definiton converts CAOS action to Primaite simulator request, sometimes having to enrich the info`
			`# by for example specifying target ip addresses, or converting a node ID into a uuid`
			`self.execution_definition = None`

			`def get_obs_from_state(self, state:Dict) -> ObsType:`
			`"""`
			`state : dict state directly from simulation.describe_state`
			`output : dict state according to CAOS.`
			`"""`
			`return self.observation_space.observe(state)`

			`def get_reward_from_state(self, state:Dict) -> float:`
			`return self.reward_function.calculate(state)`

			`@abstractmethod`
			`def get_action(self, obs:ObsType, reward:float=None):`
			`# in RL agent, this method will send CAOS observation to GATE RL agent, then receive a int 1-40,`
			`# then use a bespoke conversion to take 1-40 int back into CAOS action`
			`return ('NODE', 'SERVICE', 'SCAN', '<fake-node-sid>', '<fake-service-sid>')`

			`@abstractmethod`
			`def format_request(self, action) -> List[str]:`
			`# this will take something like APPLICATION.EXECUTE and add things like target_ip_address in simulator.`
			`# therefore the execution definition needs to be a mapping from CAOS into SIMULATOR`
			`"""Format action into format expected by the simulator, and apply execution definition if applicable."""`
			`return ['network', 'nodes', '<fake-node-uuid>', 'file_system', 'folder', 'root', 'scan']`




put in agent parsing skeleton 2023-09-26 12:54:56 +01:00
			`class AbstractScriptedAgent(AbstractAgent):`
			`"""Base class for actors which generate their own behaviour."""`

			`...`

Finalise actions interface 2023-10-02 17:21:43 +01:00			`class RandomAgent(AbstractScriptedAgent):`
			`"""Agent that ignores its observation and acts completely at random."""`

			`def get_action(self, obs:ObsType, reward:float=None):`
			`return self.action_space.space.sample()`

put in agent parsing skeleton 2023-09-26 12:54:56 +01:00
			`class AbstractGATEAgent(AbstractAgent):`
			`"""Base class for actors controlled via external messages, such as RL policies."""`

			`...`