diff --git a/src/primaite/config/training_config.py b/src/primaite/config/training_config.py index bd73f65b..a89d8c4b 100644 --- a/src/primaite/config/training_config.py +++ b/src/primaite/config/training_config.py @@ -94,64 +94,64 @@ class TrainingConfig: # Reward values # Generic - all_ok: int = 0 + all_ok: float = 0 # Node Hardware State - off_should_be_on: int = -10 - off_should_be_resetting: int = -5 - on_should_be_off: int = -2 - on_should_be_resetting: int = -5 - resetting_should_be_on: int = -5 - resetting_should_be_off: int = -2 - resetting: int = -3 + off_should_be_on: float = -0.001 + off_should_be_resetting: float = -0.0005 + on_should_be_off: float = -0.0002 + on_should_be_resetting: float = -0.0005 + resetting_should_be_on: float = -0.0005 + resetting_should_be_off: float = -0.0002 + resetting: float = -0.0003 # Node Software or Service State - good_should_be_patching: int = 2 - good_should_be_compromised: int = 5 - good_should_be_overwhelmed: int = 5 - patching_should_be_good: int = -5 - patching_should_be_compromised: int = 2 - patching_should_be_overwhelmed: int = 2 - patching: int = -3 - compromised_should_be_good: int = -20 - compromised_should_be_patching: int = -20 - compromised_should_be_overwhelmed: int = -20 - compromised: int = -20 - overwhelmed_should_be_good: int = -20 - overwhelmed_should_be_patching: int = -20 - overwhelmed_should_be_compromised: int = -20 - overwhelmed: int = -20 + good_should_be_patching: float = 0.0002 + good_should_be_compromised: float = 0.0005 + good_should_be_overwhelmed: float = 0.0005 + patching_should_be_good: float = -0.0005 + patching_should_be_compromised: float = 0.0002 + patching_should_be_overwhelmed: float = 0.0002 + patching: float = -0.0003 + compromised_should_be_good: float = -0.002 + compromised_should_be_patching: float = -0.002 + compromised_should_be_overwhelmed: float = -0.002 + compromised: float = -0.002 + overwhelmed_should_be_good: float = -0.002 + overwhelmed_should_be_patching: float = -0.002 + overwhelmed_should_be_compromised: float = -0.002 + overwhelmed: float = -0.002 # Node File System State - good_should_be_repairing: int = 2 - good_should_be_restoring: int = 2 - good_should_be_corrupt: int = 5 - good_should_be_destroyed: int = 10 - repairing_should_be_good: int = -5 - repairing_should_be_restoring: int = 2 - repairing_should_be_corrupt: int = 2 - repairing_should_be_destroyed: int = 0 - repairing: int = -3 - restoring_should_be_good: int = -10 - restoring_should_be_repairing: int = -2 - restoring_should_be_corrupt: int = 1 - restoring_should_be_destroyed: int = 2 - restoring: int = -6 - corrupt_should_be_good: int = -10 - corrupt_should_be_repairing: int = -10 - corrupt_should_be_restoring: int = -10 - corrupt_should_be_destroyed: int = 2 - corrupt: int = -10 - destroyed_should_be_good: int = -20 - destroyed_should_be_repairing: int = -20 - destroyed_should_be_restoring: int = -20 - destroyed_should_be_corrupt: int = -20 - destroyed: int = -20 - scanning: int = -2 + good_should_be_repairing: float = 0.0002 + good_should_be_restoring: float = 0.0002 + good_should_be_corrupt: float = 0.0005 + good_should_be_destroyed: float = 0.001 + repairing_should_be_good: float = -0.0005 + repairing_should_be_restoring: float = 0.0002 + repairing_should_be_corrupt: float = 0.0002 + repairing_should_be_destroyed: float = 0.0000 + repairing: float = -0.0003 + restoring_should_be_good: float = -0.001 + restoring_should_be_repairing: float = -0.0002 + restoring_should_be_corrupt: float = 0.0001 + restoring_should_be_destroyed: float = 0.0002 + restoring: float = -0.0006 + corrupt_should_be_good: float = -0.001 + corrupt_should_be_repairing: float = -0.001 + corrupt_should_be_restoring: float = -0.001 + corrupt_should_be_destroyed: float = 0.0002 + corrupt: float = -0.001 + destroyed_should_be_good: float = -0.002 + destroyed_should_be_repairing: float = -0.002 + destroyed_should_be_restoring: float = -0.002 + destroyed_should_be_corrupt: float = -0.002 + destroyed: float = -0.002 + scanning: float = -0.0002 # IER status - red_ier_running: int = -5 - green_ier_blocked: int = -10 + red_ier_running: float = -0.0005 + green_ier_blocked: float = -0.001 # Patching / Reset durations os_patching_duration: int = 5 diff --git a/src/primaite/environment/primaite_env.py b/src/primaite/environment/primaite_env.py index 03c23f93..3a40066a 100644 --- a/src/primaite/environment/primaite_env.py +++ b/src/primaite/environment/primaite_env.py @@ -142,10 +142,10 @@ class Primaite(Env): self.step_info = {} # Total reward - self.total_reward = 0 + self.total_reward: float = 0 # Average reward - self.average_reward = 0 + self.average_reward: float = 0 # Episode count self.episode_count = 0 @@ -283,9 +283,9 @@ class Primaite(Env): self._create_random_red_agent() # Reset counters and totals - self.total_reward = 0 + self.total_reward = 0.0 self.step_count = 0 - self.average_reward = 0 + self.average_reward = 0.0 # Update observations space and return self.update_environent_obs() diff --git a/src/primaite/environment/reward.py b/src/primaite/environment/reward.py index 19094a18..e4353cb9 100644 --- a/src/primaite/environment/reward.py +++ b/src/primaite/environment/reward.py @@ -20,7 +20,7 @@ def calculate_reward_function( red_iers, step_count, config_values, -): +) -> float: """ Compares the states of the initial and final nodes/links to get a reward. @@ -33,7 +33,7 @@ def calculate_reward_function( step_count: current step config_values: Config values """ - reward_value = 0 + reward_value: float = 0.0 # For each node, compare hardware state, SoftwareState, service states for node_key, final_node in final_nodes.items(): @@ -94,7 +94,7 @@ def calculate_reward_function( return reward_value -def score_node_operating_state(final_node, initial_node, reference_node, config_values): +def score_node_operating_state(final_node, initial_node, reference_node, config_values) -> float: """ Calculates score relating to the hardware state of a node. @@ -104,7 +104,7 @@ def score_node_operating_state(final_node, initial_node, reference_node, config_ reference_node: The node if there had been no red or blue effect config_values: Config values """ - score = 0 + score: float = 0.0 final_node_operating_state = final_node.hardware_state reference_node_operating_state = reference_node.hardware_state @@ -143,7 +143,7 @@ def score_node_operating_state(final_node, initial_node, reference_node, config_ return score -def score_node_os_state(final_node, initial_node, reference_node, config_values): +def score_node_os_state(final_node, initial_node, reference_node, config_values) -> float: """ Calculates score relating to the Software State of a node. @@ -153,7 +153,7 @@ def score_node_os_state(final_node, initial_node, reference_node, config_values) reference_node: The node if there had been no red or blue effect config_values: Config values """ - score = 0 + score: float = 0.0 final_node_os_state = final_node.software_state reference_node_os_state = reference_node.software_state @@ -194,7 +194,7 @@ def score_node_os_state(final_node, initial_node, reference_node, config_values) return score -def score_node_service_state(final_node, initial_node, reference_node, config_values): +def score_node_service_state(final_node, initial_node, reference_node, config_values) -> float: """ Calculates score relating to the service state(s) of a node. @@ -204,7 +204,7 @@ def score_node_service_state(final_node, initial_node, reference_node, config_va reference_node: The node if there had been no red or blue effect config_values: Config values """ - score = 0 + score: float = 0.0 final_node_services: Dict[str, Service] = final_node.services reference_node_services: Dict[str, Service] = reference_node.services @@ -266,7 +266,7 @@ def score_node_service_state(final_node, initial_node, reference_node, config_va return score -def score_node_file_system(final_node, initial_node, reference_node, config_values): +def score_node_file_system(final_node, initial_node, reference_node, config_values) -> float: """ Calculates score relating to the file system state of a node. @@ -275,7 +275,7 @@ def score_node_file_system(final_node, initial_node, reference_node, config_valu initial_node: The node before red and blue agents take effect reference_node: The node if there had been no red or blue effect """ - score = 0 + score: float = 0.0 final_node_file_system_state = final_node.file_system_state_actual reference_node_file_system_state = reference_node.file_system_state_actual diff --git a/src/primaite/transactions/transaction.py b/src/primaite/transactions/transaction.py index 7db2444a..95be8115 100644 --- a/src/primaite/transactions/transaction.py +++ b/src/primaite/transactions/transaction.py @@ -31,7 +31,7 @@ class Transaction(object): "The observation space before any actions are taken" self.obs_space_post = None "The observation space after any actions are taken" - self.reward = None + self.reward: float = None "The reward value" self.action_space = None "The action space invoked by the agent"