Merge remote-tracking branch 'origin/dev' into feature/901-change-functionality-acl-rules

This commit is contained in:
SunilSamra
2023-07-13 16:48:02 +01:00
33 changed files with 677 additions and 260 deletions

View File

@@ -162,12 +162,11 @@ class AgentSessionABC(ABC):
metadata_dict = json.load(file)
metadata_dict["end_datetime"] = datetime.now().isoformat()
if not self.is_eval:
metadata_dict["learning"]["total_episodes"] = self._env.episode_count # noqa
metadata_dict["learning"]["total_episodes"] = self._env.actual_episode_count # noqa
metadata_dict["learning"]["total_time_steps"] = self._env.total_step_count # noqa
else:
metadata_dict["evaluation"]["total_episodes"] = self._env.episode_count # noqa
metadata_dict["evaluation"]["total_episodes"] = self._env.actual_episode_count # noqa
metadata_dict["evaluation"]["total_time_steps"] = self._env.total_step_count # noqa
filepath = self.session_path / "session_metadata.json"
@@ -218,10 +217,11 @@ class AgentSessionABC(ABC):
:param kwargs: Any agent-specific key-word args to be passed.
"""
self._env.set_as_eval() # noqa
self.is_eval = True
self._plot_av_reward_per_episode(learning_session=False)
_LOGGER.info("Finished evaluation")
if self._can_evaluate:
self._plot_av_reward_per_episode(learning_session=False)
self._update_session_metadata_file()
self.is_eval = True
_LOGGER.info("Finished evaluation")
@abstractmethod
def _get_latest_checkpoint(self):
@@ -375,8 +375,8 @@ class HardCodedAgentSessionABC(AgentSessionABC):
self._env.set_as_eval() # noqa
self.is_eval = True
time_steps = self._training_config.num_steps
episodes = self._training_config.num_episodes
time_steps = self._training_config.num_eval_steps
episodes = self._training_config.num_eval_episodes
obs = self._env.reset()
for episode in range(episodes):
@@ -395,6 +395,7 @@ class HardCodedAgentSessionABC(AgentSessionABC):
time.sleep(self._training_config.time_delay / 1000)
obs = self._env.reset()
self._env.close()
super().evaluate()
@classmethod
def load(cls):

View File

@@ -97,8 +97,12 @@ class RLlibAgent(AgentSessionABC):
metadata_dict = json.load(file)
metadata_dict["end_datetime"] = datetime.now().isoformat()
metadata_dict["total_episodes"] = self._current_result["episodes_total"]
metadata_dict["total_time_steps"] = self._current_result["timesteps_total"]
if not self.is_eval:
metadata_dict["learning"]["total_episodes"] = self._current_result["episodes_total"] # noqa
metadata_dict["learning"]["total_time_steps"] = self._current_result["timesteps_total"] # noqa
else:
metadata_dict["evaluation"]["total_episodes"] = self._current_result["episodes_total"] # noqa
metadata_dict["evaluation"]["total_time_steps"] = self._current_result["timesteps_total"] # noqa
filepath = self.session_path / "session_metadata.json"
_LOGGER.debug(f"Updating Session Metadata file: {filepath}")
@@ -122,13 +126,13 @@ class RLlibAgent(AgentSessionABC):
)
self._agent_config.seed = self._training_config.seed
self._agent_config.training(train_batch_size=self._training_config.num_steps)
self._agent_config.training(train_batch_size=self._training_config.num_train_steps)
self._agent_config.framework(framework="tf")
self._agent_config.rollouts(
num_rollout_workers=1,
num_envs_per_worker=1,
horizon=self._training_config.num_steps,
horizon=self._training_config.num_train_steps,
)
self._agent: Algorithm = self._agent_config.build(logger_creator=_custom_log_creator(self.learning_path))
@@ -150,8 +154,8 @@ class RLlibAgent(AgentSessionABC):
:param kwargs: Any agent-specific key-word args to be passed.
"""
time_steps = self._training_config.num_steps
episodes = self._training_config.num_episodes
time_steps = self._training_config.num_train_steps
episodes = self._training_config.num_train_episodes
_LOGGER.info(f"Beginning learning for {episodes} episodes @" f" {time_steps} time steps...")
for i in range(episodes):
@@ -162,9 +166,6 @@ class RLlibAgent(AgentSessionABC):
super().learn()
# save agent
self.save()
def evaluate(
self,
**kwargs,

View File

@@ -65,11 +65,12 @@ class SB3Agent(AgentSessionABC):
session_path=self.session_path,
timestamp_str=self.timestamp_str,
)
self._agent = self._agent_class(
PPOMlp,
self._env,
verbose=self.sb3_output_verbose_level,
n_steps=self._training_config.num_steps,
n_steps=self._training_config.num_train_steps,
tensorboard_log=str(self._tensorboard_log_path),
seed=self._training_config.seed,
)
@@ -97,14 +98,14 @@ class SB3Agent(AgentSessionABC):
:param kwargs: Any agent-specific key-word args to be passed.
"""
time_steps = self._training_config.num_steps
episodes = self._training_config.num_episodes
time_steps = self._training_config.num_train_steps
episodes = self._training_config.num_train_episodes
self.is_eval = False
_LOGGER.info(f"Beginning learning for {episodes} episodes @" f" {time_steps} time steps...")
for i in range(episodes):
self._agent.learn(total_timesteps=time_steps)
self._save_checkpoint()
self._env.reset()
self._env._write_av_reward_per_episode() # noqa
self.save()
self._env.close()
super().learn()
@@ -121,8 +122,8 @@ class SB3Agent(AgentSessionABC):
:param kwargs: Any agent-specific key-word args to be passed.
"""
time_steps = self._training_config.num_steps
episodes = self._training_config.num_episodes
time_steps = self._training_config.num_eval_steps
episodes = self._training_config.num_eval_episodes
self._env.set_as_eval()
self.is_eval = True
if self._training_config.deterministic:
@@ -140,7 +141,7 @@ class SB3Agent(AgentSessionABC):
if isinstance(action, np.ndarray):
action = np.int64(action)
obs, rewards, done, info = self._env.step(action)
self._env.reset()
self._env._write_av_reward_per_episode() # noqa
self._env.close()
super().evaluate()

View File

@@ -59,11 +59,19 @@ observation_space:
- name: NODE_LINK_TABLE
# - name: NODE_STATUSES
# - name: LINK_TRAFFIC_LEVELS
# Number of episodes to run per session
num_episodes: 10
# Number of time_steps per episode
num_steps: 256
# Number of episodes for training to run per session
num_train_episodes: 10
# Number of time_steps for training per episode
num_train_steps: 256
# Number of episodes for evaluation to run per session
num_eval_episodes: 1
# Number of time_steps for evaluation per episode
num_eval_steps: 256
# Sets how often the agent will save a checkpoint (every n time episodes).
# Set to 0 if no checkpoints are required. Default is 10

View File

@@ -61,11 +61,17 @@ class TrainingConfig:
action_type: ActionType = ActionType.ANY
"The ActionType to use"
num_episodes: int = 10
"The number of episodes to train over"
num_train_episodes: int = 10
"The number of episodes to train over during an training session"
num_steps: int = 256
"The number of steps in an episode"
num_train_steps: int = 256
"The number of steps in an episode during an training session"
num_eval_episodes: int = 1
"The number of episodes to train over during an evaluation session"
num_eval_steps: int = 256
"The number of steps in an episode during an evaluation session"
checkpoint_every_n_episodes: int = 5
"The agent will save a checkpoint every n episodes"
@@ -249,8 +255,17 @@ class TrainingConfig:
tc += f"{self.hard_coded_agent_view}, "
tc += f"{self.action_type}, "
tc += f"observation_space={self.observation_space}, "
tc += f"{self.num_episodes} episodes @ "
tc += f"{self.num_steps} steps"
if self.session_type is SessionType.TRAIN:
tc += f"{self.num_train_episodes} episodes @ "
tc += f"{self.num_train_steps} steps"
elif self.session_type is SessionType.EVAL:
tc += f"{self.num_eval_episodes} episodes @ "
tc += f"{self.num_eval_steps} steps"
else:
tc += f"Training: {self.num_eval_episodes} episodes @ "
tc += f"{self.num_eval_steps} steps"
tc += f"Evaluation: {self.num_eval_episodes} episodes @ "
tc += f"{self.num_eval_steps} steps"
return tc
@@ -298,24 +313,27 @@ def convert_legacy_training_config_dict(
agent_framework: AgentFramework = AgentFramework.SB3,
agent_identifier: AgentIdentifier = AgentIdentifier.PPO,
action_type: ActionType = ActionType.ANY,
num_steps: int = 256,
num_train_steps: int = 256,
) -> Dict[str, Any]:
"""
Convert a legacy training config dict to the new format.
:param legacy_config_dict: A legacy training config dict.
:param agent_framework: The agent framework to use as legacy training configs don't have agent_framework values.
:param agent_identifier: The red agent identifier to use as legacy training configs don't have agent_identifier
values.
:param action_type: The action space type to set as legacy training configs don't have action_type values.
:param num_steps: The number of steps to set as legacy training configs don't have num_steps values.
:param agent_framework: The agent framework to use as legacy training
configs don't have agent_framework values.
:param agent_identifier: The red agent identifier to use as legacy
training configs don't have agent_identifier values.
:param action_type: The action space type to set as legacy training configs
don't have action_type values.
:param num_train_steps: The number of steps to set as legacy training configs
don't have num_train_steps values.
:return: The converted training config dict.
"""
config_dict = {
"agent_framework": agent_framework.name,
"agent_identifier": agent_identifier.name,
"action_type": action_type.name,
"num_steps": num_steps,
"num_train_steps": num_train_steps,
"sb3_output_verbose_level": SB3OutputVerboseLevel.INFO.name,
}
session_type_map = {"TRAINING": "TRAIN", "EVALUATION": "EVAL"}
@@ -336,7 +354,8 @@ def _get_new_key_from_legacy(legacy_key: str) -> str:
"""
key_mapping = {
"agentIdentifier": None,
"numEpisodes": "num_episodes",
"numEpisodes": "num_train_episodes",
"numSteps": "num_train_steps",
"timeDelay": "time_delay",
"configFilename": None,
"sessionType": "session_type",

View File

@@ -84,7 +84,12 @@ class Primaite(Env):
_LOGGER.info(f"Using: {str(self.training_config)}")
# Number of steps in an episode
self.episode_steps = self.training_config.num_steps
if self.training_config.session_type == SessionType.TRAIN:
self.episode_steps = self.training_config.num_train_steps
elif self.training_config.session_type == SessionType.EVAL:
self.episode_steps = self.training_config.num_eval_steps
else:
self.episode_steps = self.training_config.num_train_steps
super(Primaite, self).__init__()
@@ -259,6 +264,12 @@ class Primaite(Env):
self.episode_count = 0
self.step_count = 0
self.total_step_count = 0
self.episode_steps = self.training_config.num_eval_steps
def _write_av_reward_per_episode(self):
if self.actual_episode_count > 0:
csv_data = self.actual_episode_count, self.average_reward
self.episode_av_reward_writer.write(csv_data)
def reset(self):
"""
@@ -267,10 +278,7 @@ class Primaite(Env):
Returns:
Environment observation space (reset)
"""
if self.actual_episode_count > 0:
csv_data = self.actual_episode_count, self.average_reward
self.episode_av_reward_writer.write(csv_data)
self._write_av_reward_per_episode()
self.episode_count += 1
# Don't need to reset links, as they are cleared and recalculated every

View File

@@ -90,7 +90,6 @@ def calculate_reward_function(
f"Penalty of {ier_reward} was NOT applied."
)
)
return reward_value

View File

@@ -15,5 +15,6 @@ def av_rewards_dict(av_rewards_csv_file: Union[str, Path]) -> Dict[int, float]:
:param av_rewards_csv_file: The average rewards per episode csv file path.
:return: The average rewards per episode cdv as a dict.
"""
d = pl.read_csv(av_rewards_csv_file).to_dict()
return {v: d["Average Reward"][i] for i, v in enumerate(d["Episode"])}
df = pl.read_csv(av_rewards_csv_file).to_dict()
return {v: df["Average Reward"][i] for i, v in enumerate(df["Episode"])}