Merge remote-tracking branch 'origin/dev' into feature/901-change-functionality-acl-rules
This commit is contained in:
@@ -162,12 +162,11 @@ class AgentSessionABC(ABC):
|
||||
metadata_dict = json.load(file)
|
||||
|
||||
metadata_dict["end_datetime"] = datetime.now().isoformat()
|
||||
|
||||
if not self.is_eval:
|
||||
metadata_dict["learning"]["total_episodes"] = self._env.episode_count # noqa
|
||||
metadata_dict["learning"]["total_episodes"] = self._env.actual_episode_count # noqa
|
||||
metadata_dict["learning"]["total_time_steps"] = self._env.total_step_count # noqa
|
||||
else:
|
||||
metadata_dict["evaluation"]["total_episodes"] = self._env.episode_count # noqa
|
||||
metadata_dict["evaluation"]["total_episodes"] = self._env.actual_episode_count # noqa
|
||||
metadata_dict["evaluation"]["total_time_steps"] = self._env.total_step_count # noqa
|
||||
|
||||
filepath = self.session_path / "session_metadata.json"
|
||||
@@ -218,10 +217,11 @@ class AgentSessionABC(ABC):
|
||||
|
||||
:param kwargs: Any agent-specific key-word args to be passed.
|
||||
"""
|
||||
self._env.set_as_eval() # noqa
|
||||
self.is_eval = True
|
||||
self._plot_av_reward_per_episode(learning_session=False)
|
||||
_LOGGER.info("Finished evaluation")
|
||||
if self._can_evaluate:
|
||||
self._plot_av_reward_per_episode(learning_session=False)
|
||||
self._update_session_metadata_file()
|
||||
self.is_eval = True
|
||||
_LOGGER.info("Finished evaluation")
|
||||
|
||||
@abstractmethod
|
||||
def _get_latest_checkpoint(self):
|
||||
@@ -375,8 +375,8 @@ class HardCodedAgentSessionABC(AgentSessionABC):
|
||||
self._env.set_as_eval() # noqa
|
||||
self.is_eval = True
|
||||
|
||||
time_steps = self._training_config.num_steps
|
||||
episodes = self._training_config.num_episodes
|
||||
time_steps = self._training_config.num_eval_steps
|
||||
episodes = self._training_config.num_eval_episodes
|
||||
|
||||
obs = self._env.reset()
|
||||
for episode in range(episodes):
|
||||
@@ -395,6 +395,7 @@ class HardCodedAgentSessionABC(AgentSessionABC):
|
||||
time.sleep(self._training_config.time_delay / 1000)
|
||||
obs = self._env.reset()
|
||||
self._env.close()
|
||||
super().evaluate()
|
||||
|
||||
@classmethod
|
||||
def load(cls):
|
||||
|
||||
@@ -97,8 +97,12 @@ class RLlibAgent(AgentSessionABC):
|
||||
metadata_dict = json.load(file)
|
||||
|
||||
metadata_dict["end_datetime"] = datetime.now().isoformat()
|
||||
metadata_dict["total_episodes"] = self._current_result["episodes_total"]
|
||||
metadata_dict["total_time_steps"] = self._current_result["timesteps_total"]
|
||||
if not self.is_eval:
|
||||
metadata_dict["learning"]["total_episodes"] = self._current_result["episodes_total"] # noqa
|
||||
metadata_dict["learning"]["total_time_steps"] = self._current_result["timesteps_total"] # noqa
|
||||
else:
|
||||
metadata_dict["evaluation"]["total_episodes"] = self._current_result["episodes_total"] # noqa
|
||||
metadata_dict["evaluation"]["total_time_steps"] = self._current_result["timesteps_total"] # noqa
|
||||
|
||||
filepath = self.session_path / "session_metadata.json"
|
||||
_LOGGER.debug(f"Updating Session Metadata file: {filepath}")
|
||||
@@ -122,13 +126,13 @@ class RLlibAgent(AgentSessionABC):
|
||||
)
|
||||
self._agent_config.seed = self._training_config.seed
|
||||
|
||||
self._agent_config.training(train_batch_size=self._training_config.num_steps)
|
||||
self._agent_config.training(train_batch_size=self._training_config.num_train_steps)
|
||||
self._agent_config.framework(framework="tf")
|
||||
|
||||
self._agent_config.rollouts(
|
||||
num_rollout_workers=1,
|
||||
num_envs_per_worker=1,
|
||||
horizon=self._training_config.num_steps,
|
||||
horizon=self._training_config.num_train_steps,
|
||||
)
|
||||
self._agent: Algorithm = self._agent_config.build(logger_creator=_custom_log_creator(self.learning_path))
|
||||
|
||||
@@ -150,8 +154,8 @@ class RLlibAgent(AgentSessionABC):
|
||||
|
||||
:param kwargs: Any agent-specific key-word args to be passed.
|
||||
"""
|
||||
time_steps = self._training_config.num_steps
|
||||
episodes = self._training_config.num_episodes
|
||||
time_steps = self._training_config.num_train_steps
|
||||
episodes = self._training_config.num_train_episodes
|
||||
|
||||
_LOGGER.info(f"Beginning learning for {episodes} episodes @" f" {time_steps} time steps...")
|
||||
for i in range(episodes):
|
||||
@@ -162,9 +166,6 @@ class RLlibAgent(AgentSessionABC):
|
||||
|
||||
super().learn()
|
||||
|
||||
# save agent
|
||||
self.save()
|
||||
|
||||
def evaluate(
|
||||
self,
|
||||
**kwargs,
|
||||
|
||||
@@ -65,11 +65,12 @@ class SB3Agent(AgentSessionABC):
|
||||
session_path=self.session_path,
|
||||
timestamp_str=self.timestamp_str,
|
||||
)
|
||||
|
||||
self._agent = self._agent_class(
|
||||
PPOMlp,
|
||||
self._env,
|
||||
verbose=self.sb3_output_verbose_level,
|
||||
n_steps=self._training_config.num_steps,
|
||||
n_steps=self._training_config.num_train_steps,
|
||||
tensorboard_log=str(self._tensorboard_log_path),
|
||||
seed=self._training_config.seed,
|
||||
)
|
||||
@@ -97,14 +98,14 @@ class SB3Agent(AgentSessionABC):
|
||||
|
||||
:param kwargs: Any agent-specific key-word args to be passed.
|
||||
"""
|
||||
time_steps = self._training_config.num_steps
|
||||
episodes = self._training_config.num_episodes
|
||||
time_steps = self._training_config.num_train_steps
|
||||
episodes = self._training_config.num_train_episodes
|
||||
self.is_eval = False
|
||||
_LOGGER.info(f"Beginning learning for {episodes} episodes @" f" {time_steps} time steps...")
|
||||
for i in range(episodes):
|
||||
self._agent.learn(total_timesteps=time_steps)
|
||||
self._save_checkpoint()
|
||||
self._env.reset()
|
||||
self._env._write_av_reward_per_episode() # noqa
|
||||
self.save()
|
||||
self._env.close()
|
||||
super().learn()
|
||||
@@ -121,8 +122,8 @@ class SB3Agent(AgentSessionABC):
|
||||
|
||||
:param kwargs: Any agent-specific key-word args to be passed.
|
||||
"""
|
||||
time_steps = self._training_config.num_steps
|
||||
episodes = self._training_config.num_episodes
|
||||
time_steps = self._training_config.num_eval_steps
|
||||
episodes = self._training_config.num_eval_episodes
|
||||
self._env.set_as_eval()
|
||||
self.is_eval = True
|
||||
if self._training_config.deterministic:
|
||||
@@ -140,7 +141,7 @@ class SB3Agent(AgentSessionABC):
|
||||
if isinstance(action, np.ndarray):
|
||||
action = np.int64(action)
|
||||
obs, rewards, done, info = self._env.step(action)
|
||||
self._env.reset()
|
||||
self._env._write_av_reward_per_episode() # noqa
|
||||
self._env.close()
|
||||
super().evaluate()
|
||||
|
||||
|
||||
@@ -59,11 +59,19 @@ observation_space:
|
||||
- name: NODE_LINK_TABLE
|
||||
# - name: NODE_STATUSES
|
||||
# - name: LINK_TRAFFIC_LEVELS
|
||||
# Number of episodes to run per session
|
||||
num_episodes: 10
|
||||
|
||||
# Number of time_steps per episode
|
||||
num_steps: 256
|
||||
|
||||
# Number of episodes for training to run per session
|
||||
num_train_episodes: 10
|
||||
|
||||
# Number of time_steps for training per episode
|
||||
num_train_steps: 256
|
||||
|
||||
# Number of episodes for evaluation to run per session
|
||||
num_eval_episodes: 1
|
||||
|
||||
# Number of time_steps for evaluation per episode
|
||||
num_eval_steps: 256
|
||||
|
||||
# Sets how often the agent will save a checkpoint (every n time episodes).
|
||||
# Set to 0 if no checkpoints are required. Default is 10
|
||||
|
||||
@@ -61,11 +61,17 @@ class TrainingConfig:
|
||||
action_type: ActionType = ActionType.ANY
|
||||
"The ActionType to use"
|
||||
|
||||
num_episodes: int = 10
|
||||
"The number of episodes to train over"
|
||||
num_train_episodes: int = 10
|
||||
"The number of episodes to train over during an training session"
|
||||
|
||||
num_steps: int = 256
|
||||
"The number of steps in an episode"
|
||||
num_train_steps: int = 256
|
||||
"The number of steps in an episode during an training session"
|
||||
|
||||
num_eval_episodes: int = 1
|
||||
"The number of episodes to train over during an evaluation session"
|
||||
|
||||
num_eval_steps: int = 256
|
||||
"The number of steps in an episode during an evaluation session"
|
||||
|
||||
checkpoint_every_n_episodes: int = 5
|
||||
"The agent will save a checkpoint every n episodes"
|
||||
@@ -249,8 +255,17 @@ class TrainingConfig:
|
||||
tc += f"{self.hard_coded_agent_view}, "
|
||||
tc += f"{self.action_type}, "
|
||||
tc += f"observation_space={self.observation_space}, "
|
||||
tc += f"{self.num_episodes} episodes @ "
|
||||
tc += f"{self.num_steps} steps"
|
||||
if self.session_type is SessionType.TRAIN:
|
||||
tc += f"{self.num_train_episodes} episodes @ "
|
||||
tc += f"{self.num_train_steps} steps"
|
||||
elif self.session_type is SessionType.EVAL:
|
||||
tc += f"{self.num_eval_episodes} episodes @ "
|
||||
tc += f"{self.num_eval_steps} steps"
|
||||
else:
|
||||
tc += f"Training: {self.num_eval_episodes} episodes @ "
|
||||
tc += f"{self.num_eval_steps} steps"
|
||||
tc += f"Evaluation: {self.num_eval_episodes} episodes @ "
|
||||
tc += f"{self.num_eval_steps} steps"
|
||||
return tc
|
||||
|
||||
|
||||
@@ -298,24 +313,27 @@ def convert_legacy_training_config_dict(
|
||||
agent_framework: AgentFramework = AgentFramework.SB3,
|
||||
agent_identifier: AgentIdentifier = AgentIdentifier.PPO,
|
||||
action_type: ActionType = ActionType.ANY,
|
||||
num_steps: int = 256,
|
||||
num_train_steps: int = 256,
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Convert a legacy training config dict to the new format.
|
||||
|
||||
:param legacy_config_dict: A legacy training config dict.
|
||||
:param agent_framework: The agent framework to use as legacy training configs don't have agent_framework values.
|
||||
:param agent_identifier: The red agent identifier to use as legacy training configs don't have agent_identifier
|
||||
values.
|
||||
:param action_type: The action space type to set as legacy training configs don't have action_type values.
|
||||
:param num_steps: The number of steps to set as legacy training configs don't have num_steps values.
|
||||
:param agent_framework: The agent framework to use as legacy training
|
||||
configs don't have agent_framework values.
|
||||
:param agent_identifier: The red agent identifier to use as legacy
|
||||
training configs don't have agent_identifier values.
|
||||
:param action_type: The action space type to set as legacy training configs
|
||||
don't have action_type values.
|
||||
:param num_train_steps: The number of steps to set as legacy training configs
|
||||
don't have num_train_steps values.
|
||||
:return: The converted training config dict.
|
||||
"""
|
||||
config_dict = {
|
||||
"agent_framework": agent_framework.name,
|
||||
"agent_identifier": agent_identifier.name,
|
||||
"action_type": action_type.name,
|
||||
"num_steps": num_steps,
|
||||
"num_train_steps": num_train_steps,
|
||||
"sb3_output_verbose_level": SB3OutputVerboseLevel.INFO.name,
|
||||
}
|
||||
session_type_map = {"TRAINING": "TRAIN", "EVALUATION": "EVAL"}
|
||||
@@ -336,7 +354,8 @@ def _get_new_key_from_legacy(legacy_key: str) -> str:
|
||||
"""
|
||||
key_mapping = {
|
||||
"agentIdentifier": None,
|
||||
"numEpisodes": "num_episodes",
|
||||
"numEpisodes": "num_train_episodes",
|
||||
"numSteps": "num_train_steps",
|
||||
"timeDelay": "time_delay",
|
||||
"configFilename": None,
|
||||
"sessionType": "session_type",
|
||||
|
||||
@@ -84,7 +84,12 @@ class Primaite(Env):
|
||||
_LOGGER.info(f"Using: {str(self.training_config)}")
|
||||
|
||||
# Number of steps in an episode
|
||||
self.episode_steps = self.training_config.num_steps
|
||||
if self.training_config.session_type == SessionType.TRAIN:
|
||||
self.episode_steps = self.training_config.num_train_steps
|
||||
elif self.training_config.session_type == SessionType.EVAL:
|
||||
self.episode_steps = self.training_config.num_eval_steps
|
||||
else:
|
||||
self.episode_steps = self.training_config.num_train_steps
|
||||
|
||||
super(Primaite, self).__init__()
|
||||
|
||||
@@ -259,6 +264,12 @@ class Primaite(Env):
|
||||
self.episode_count = 0
|
||||
self.step_count = 0
|
||||
self.total_step_count = 0
|
||||
self.episode_steps = self.training_config.num_eval_steps
|
||||
|
||||
def _write_av_reward_per_episode(self):
|
||||
if self.actual_episode_count > 0:
|
||||
csv_data = self.actual_episode_count, self.average_reward
|
||||
self.episode_av_reward_writer.write(csv_data)
|
||||
|
||||
def reset(self):
|
||||
"""
|
||||
@@ -267,10 +278,7 @@ class Primaite(Env):
|
||||
Returns:
|
||||
Environment observation space (reset)
|
||||
"""
|
||||
if self.actual_episode_count > 0:
|
||||
csv_data = self.actual_episode_count, self.average_reward
|
||||
self.episode_av_reward_writer.write(csv_data)
|
||||
|
||||
self._write_av_reward_per_episode()
|
||||
self.episode_count += 1
|
||||
|
||||
# Don't need to reset links, as they are cleared and recalculated every
|
||||
|
||||
@@ -90,7 +90,6 @@ def calculate_reward_function(
|
||||
f"Penalty of {ier_reward} was NOT applied."
|
||||
)
|
||||
)
|
||||
|
||||
return reward_value
|
||||
|
||||
|
||||
|
||||
@@ -15,5 +15,6 @@ def av_rewards_dict(av_rewards_csv_file: Union[str, Path]) -> Dict[int, float]:
|
||||
:param av_rewards_csv_file: The average rewards per episode csv file path.
|
||||
:return: The average rewards per episode cdv as a dict.
|
||||
"""
|
||||
d = pl.read_csv(av_rewards_csv_file).to_dict()
|
||||
return {v: d["Average Reward"][i] for i, v in enumerate(d["Episode"])}
|
||||
df = pl.read_csv(av_rewards_csv_file).to_dict()
|
||||
|
||||
return {v: df["Average Reward"][i] for i, v in enumerate(df["Episode"])}
|
||||
|
||||
Reference in New Issue
Block a user