Change reward to float and divide by 10000
This commit is contained in:
@@ -94,64 +94,64 @@ class TrainingConfig:
|
||||
|
||||
# Reward values
|
||||
# Generic
|
||||
all_ok: int = 0
|
||||
all_ok: float = 0
|
||||
|
||||
# Node Hardware State
|
||||
off_should_be_on: int = -10
|
||||
off_should_be_resetting: int = -5
|
||||
on_should_be_off: int = -2
|
||||
on_should_be_resetting: int = -5
|
||||
resetting_should_be_on: int = -5
|
||||
resetting_should_be_off: int = -2
|
||||
resetting: int = -3
|
||||
off_should_be_on: float = -0.001
|
||||
off_should_be_resetting: float = -0.0005
|
||||
on_should_be_off: float = -0.0002
|
||||
on_should_be_resetting: float = -0.0005
|
||||
resetting_should_be_on: float = -0.0005
|
||||
resetting_should_be_off: float = -0.0002
|
||||
resetting: float = -0.0003
|
||||
|
||||
# Node Software or Service State
|
||||
good_should_be_patching: int = 2
|
||||
good_should_be_compromised: int = 5
|
||||
good_should_be_overwhelmed: int = 5
|
||||
patching_should_be_good: int = -5
|
||||
patching_should_be_compromised: int = 2
|
||||
patching_should_be_overwhelmed: int = 2
|
||||
patching: int = -3
|
||||
compromised_should_be_good: int = -20
|
||||
compromised_should_be_patching: int = -20
|
||||
compromised_should_be_overwhelmed: int = -20
|
||||
compromised: int = -20
|
||||
overwhelmed_should_be_good: int = -20
|
||||
overwhelmed_should_be_patching: int = -20
|
||||
overwhelmed_should_be_compromised: int = -20
|
||||
overwhelmed: int = -20
|
||||
good_should_be_patching: float = 0.0002
|
||||
good_should_be_compromised: float = 0.0005
|
||||
good_should_be_overwhelmed: float = 0.0005
|
||||
patching_should_be_good: float = -0.0005
|
||||
patching_should_be_compromised: float = 0.0002
|
||||
patching_should_be_overwhelmed: float = 0.0002
|
||||
patching: float = -0.0003
|
||||
compromised_should_be_good: float = -0.002
|
||||
compromised_should_be_patching: float = -0.002
|
||||
compromised_should_be_overwhelmed: float = -0.002
|
||||
compromised: float = -0.002
|
||||
overwhelmed_should_be_good: float = -0.002
|
||||
overwhelmed_should_be_patching: float = -0.002
|
||||
overwhelmed_should_be_compromised: float = -0.002
|
||||
overwhelmed: float = -0.002
|
||||
|
||||
# Node File System State
|
||||
good_should_be_repairing: int = 2
|
||||
good_should_be_restoring: int = 2
|
||||
good_should_be_corrupt: int = 5
|
||||
good_should_be_destroyed: int = 10
|
||||
repairing_should_be_good: int = -5
|
||||
repairing_should_be_restoring: int = 2
|
||||
repairing_should_be_corrupt: int = 2
|
||||
repairing_should_be_destroyed: int = 0
|
||||
repairing: int = -3
|
||||
restoring_should_be_good: int = -10
|
||||
restoring_should_be_repairing: int = -2
|
||||
restoring_should_be_corrupt: int = 1
|
||||
restoring_should_be_destroyed: int = 2
|
||||
restoring: int = -6
|
||||
corrupt_should_be_good: int = -10
|
||||
corrupt_should_be_repairing: int = -10
|
||||
corrupt_should_be_restoring: int = -10
|
||||
corrupt_should_be_destroyed: int = 2
|
||||
corrupt: int = -10
|
||||
destroyed_should_be_good: int = -20
|
||||
destroyed_should_be_repairing: int = -20
|
||||
destroyed_should_be_restoring: int = -20
|
||||
destroyed_should_be_corrupt: int = -20
|
||||
destroyed: int = -20
|
||||
scanning: int = -2
|
||||
good_should_be_repairing: float = 0.0002
|
||||
good_should_be_restoring: float = 0.0002
|
||||
good_should_be_corrupt: float = 0.0005
|
||||
good_should_be_destroyed: float = 0.001
|
||||
repairing_should_be_good: float = -0.0005
|
||||
repairing_should_be_restoring: float = 0.0002
|
||||
repairing_should_be_corrupt: float = 0.0002
|
||||
repairing_should_be_destroyed: float = 0.0000
|
||||
repairing: float = -0.0003
|
||||
restoring_should_be_good: float = -0.001
|
||||
restoring_should_be_repairing: float = -0.0002
|
||||
restoring_should_be_corrupt: float = 0.0001
|
||||
restoring_should_be_destroyed: float = 0.0002
|
||||
restoring: float = -0.0006
|
||||
corrupt_should_be_good: float = -0.001
|
||||
corrupt_should_be_repairing: float = -0.001
|
||||
corrupt_should_be_restoring: float = -0.001
|
||||
corrupt_should_be_destroyed: float = 0.0002
|
||||
corrupt: float = -0.001
|
||||
destroyed_should_be_good: float = -0.002
|
||||
destroyed_should_be_repairing: float = -0.002
|
||||
destroyed_should_be_restoring: float = -0.002
|
||||
destroyed_should_be_corrupt: float = -0.002
|
||||
destroyed: float = -0.002
|
||||
scanning: float = -0.0002
|
||||
|
||||
# IER status
|
||||
red_ier_running: int = -5
|
||||
green_ier_blocked: int = -10
|
||||
red_ier_running: float = -0.0005
|
||||
green_ier_blocked: float = -0.001
|
||||
|
||||
# Patching / Reset durations
|
||||
os_patching_duration: int = 5
|
||||
|
||||
@@ -142,10 +142,10 @@ class Primaite(Env):
|
||||
self.step_info = {}
|
||||
|
||||
# Total reward
|
||||
self.total_reward = 0
|
||||
self.total_reward: float = 0
|
||||
|
||||
# Average reward
|
||||
self.average_reward = 0
|
||||
self.average_reward: float = 0
|
||||
|
||||
# Episode count
|
||||
self.episode_count = 0
|
||||
@@ -283,9 +283,9 @@ class Primaite(Env):
|
||||
self._create_random_red_agent()
|
||||
|
||||
# Reset counters and totals
|
||||
self.total_reward = 0
|
||||
self.total_reward = 0.0
|
||||
self.step_count = 0
|
||||
self.average_reward = 0
|
||||
self.average_reward = 0.0
|
||||
|
||||
# Update observations space and return
|
||||
self.update_environent_obs()
|
||||
|
||||
@@ -20,7 +20,7 @@ def calculate_reward_function(
|
||||
red_iers,
|
||||
step_count,
|
||||
config_values,
|
||||
):
|
||||
) -> float:
|
||||
"""
|
||||
Compares the states of the initial and final nodes/links to get a reward.
|
||||
|
||||
@@ -33,7 +33,7 @@ def calculate_reward_function(
|
||||
step_count: current step
|
||||
config_values: Config values
|
||||
"""
|
||||
reward_value = 0
|
||||
reward_value: float = 0.0
|
||||
|
||||
# For each node, compare hardware state, SoftwareState, service states
|
||||
for node_key, final_node in final_nodes.items():
|
||||
@@ -94,7 +94,7 @@ def calculate_reward_function(
|
||||
return reward_value
|
||||
|
||||
|
||||
def score_node_operating_state(final_node, initial_node, reference_node, config_values):
|
||||
def score_node_operating_state(final_node, initial_node, reference_node, config_values) -> float:
|
||||
"""
|
||||
Calculates score relating to the hardware state of a node.
|
||||
|
||||
@@ -104,7 +104,7 @@ def score_node_operating_state(final_node, initial_node, reference_node, config_
|
||||
reference_node: The node if there had been no red or blue effect
|
||||
config_values: Config values
|
||||
"""
|
||||
score = 0
|
||||
score: float = 0.0
|
||||
final_node_operating_state = final_node.hardware_state
|
||||
reference_node_operating_state = reference_node.hardware_state
|
||||
|
||||
@@ -143,7 +143,7 @@ def score_node_operating_state(final_node, initial_node, reference_node, config_
|
||||
return score
|
||||
|
||||
|
||||
def score_node_os_state(final_node, initial_node, reference_node, config_values):
|
||||
def score_node_os_state(final_node, initial_node, reference_node, config_values) -> float:
|
||||
"""
|
||||
Calculates score relating to the Software State of a node.
|
||||
|
||||
@@ -153,7 +153,7 @@ def score_node_os_state(final_node, initial_node, reference_node, config_values)
|
||||
reference_node: The node if there had been no red or blue effect
|
||||
config_values: Config values
|
||||
"""
|
||||
score = 0
|
||||
score: float = 0.0
|
||||
final_node_os_state = final_node.software_state
|
||||
reference_node_os_state = reference_node.software_state
|
||||
|
||||
@@ -194,7 +194,7 @@ def score_node_os_state(final_node, initial_node, reference_node, config_values)
|
||||
return score
|
||||
|
||||
|
||||
def score_node_service_state(final_node, initial_node, reference_node, config_values):
|
||||
def score_node_service_state(final_node, initial_node, reference_node, config_values) -> float:
|
||||
"""
|
||||
Calculates score relating to the service state(s) of a node.
|
||||
|
||||
@@ -204,7 +204,7 @@ def score_node_service_state(final_node, initial_node, reference_node, config_va
|
||||
reference_node: The node if there had been no red or blue effect
|
||||
config_values: Config values
|
||||
"""
|
||||
score = 0
|
||||
score: float = 0.0
|
||||
final_node_services: Dict[str, Service] = final_node.services
|
||||
reference_node_services: Dict[str, Service] = reference_node.services
|
||||
|
||||
@@ -266,7 +266,7 @@ def score_node_service_state(final_node, initial_node, reference_node, config_va
|
||||
return score
|
||||
|
||||
|
||||
def score_node_file_system(final_node, initial_node, reference_node, config_values):
|
||||
def score_node_file_system(final_node, initial_node, reference_node, config_values) -> float:
|
||||
"""
|
||||
Calculates score relating to the file system state of a node.
|
||||
|
||||
@@ -275,7 +275,7 @@ def score_node_file_system(final_node, initial_node, reference_node, config_valu
|
||||
initial_node: The node before red and blue agents take effect
|
||||
reference_node: The node if there had been no red or blue effect
|
||||
"""
|
||||
score = 0
|
||||
score: float = 0.0
|
||||
final_node_file_system_state = final_node.file_system_state_actual
|
||||
reference_node_file_system_state = reference_node.file_system_state_actual
|
||||
|
||||
|
||||
@@ -31,7 +31,7 @@ class Transaction(object):
|
||||
"The observation space before any actions are taken"
|
||||
self.obs_space_post = None
|
||||
"The observation space after any actions are taken"
|
||||
self.reward = None
|
||||
self.reward: float = None
|
||||
"The reward value"
|
||||
self.action_space = None
|
||||
"The action space invoked by the agent"
|
||||
|
||||
Reference in New Issue
Block a user