1566 - added correct num_train_episodes etc values to configs, fixed test_reward.py
This commit is contained in:
@@ -377,6 +377,7 @@ class HardCodedAgentSessionABC(AgentSessionABC):
|
||||
time.sleep(self._training_config.time_delay / 1000)
|
||||
obs = self._env.reset()
|
||||
self._env.close()
|
||||
super().evaluate()
|
||||
|
||||
@classmethod
|
||||
def load(cls):
|
||||
|
||||
@@ -60,10 +60,16 @@ observation_space:
|
||||
# - name: NODE_STATUSES
|
||||
# - name: LINK_TRAFFIC_LEVELS
|
||||
# Number of episodes to run per session
|
||||
num_episodes: 10
|
||||
num_train_episodes: 10
|
||||
|
||||
# Number of time_steps per episode
|
||||
num_steps: 256
|
||||
num_train_steps: 256
|
||||
|
||||
# Number of episodes to run per session
|
||||
num_eval_episodes: 10
|
||||
|
||||
# Number of time_steps per episode
|
||||
num_eval_steps: 256
|
||||
|
||||
# Sets how often the agent will save a checkpoint (every n time episodes).
|
||||
# Set to 0 if no checkpoints are required. Default is 10
|
||||
|
||||
@@ -60,10 +60,16 @@ observation_space:
|
||||
# - name: NODE_STATUSES
|
||||
# - name: LINK_TRAFFIC_LEVELS
|
||||
# Number of episodes to run per session
|
||||
num_episodes: 10
|
||||
num_train_episodes: 10
|
||||
|
||||
# Number of time_steps per episode
|
||||
num_steps: 256
|
||||
num_train_steps: 256
|
||||
|
||||
# Number of episodes to run per session
|
||||
num_eval_episodes: 1
|
||||
|
||||
# Number of time_steps per episode
|
||||
num_eval_steps: 256
|
||||
|
||||
# Sets how often the agent will save a checkpoint (every n time episodes).
|
||||
# Set to 0 if no checkpoints are required. Default is 10
|
||||
|
||||
@@ -23,16 +23,11 @@ agent_identifier: RANDOM
|
||||
# "ANY" node and acl actions
|
||||
action_type: ANY
|
||||
# Number of episodes for training to run per session
|
||||
num_train_episodes: 10
|
||||
num_train_episodes: 1
|
||||
|
||||
# Number of time_steps for training per episode
|
||||
num_train_steps: 256
|
||||
num_train_steps: 15
|
||||
|
||||
# Number of episodes for evaluation to run per session
|
||||
num_eval_episodes: 10
|
||||
|
||||
# Number of time_steps for evaluation per episode
|
||||
num_eval_steps: 256
|
||||
# Time delay between steps (for generic agents)
|
||||
time_delay: 1
|
||||
# Type of session to be run (TRAINING or EVALUATION)
|
||||
|
||||
@@ -32,14 +32,6 @@
|
||||
- name: ftp
|
||||
port: '21'
|
||||
state: COMPROMISED
|
||||
- item_type: POSITION
|
||||
positions:
|
||||
- node: '1'
|
||||
x_pos: 309
|
||||
y_pos: 78
|
||||
- node: '2'
|
||||
x_pos: 200
|
||||
y_pos: 78
|
||||
- item_type: RED_IER
|
||||
id: '3'
|
||||
start_step: 2
|
||||
|
||||
@@ -29,16 +29,16 @@ random_red_agent: True
|
||||
# "ANY" node and acl actions
|
||||
action_type: NODE
|
||||
# Number of episodes for training to run per session
|
||||
num_train_episodes: 10
|
||||
num_train_episodes: 2
|
||||
|
||||
# Number of time_steps for training per episode
|
||||
num_train_steps: 256
|
||||
num_train_steps: 15
|
||||
|
||||
# Number of episodes for evaluation to run per session
|
||||
num_eval_episodes: 10
|
||||
num_eval_episodes: 2
|
||||
|
||||
# Number of time_steps for evaluation per episode
|
||||
num_eval_steps: 256
|
||||
num_eval_steps: 15
|
||||
# Time delay between steps (for generic agents)
|
||||
time_delay: 1
|
||||
|
||||
|
||||
@@ -47,6 +47,6 @@ def test_rewards_are_being_penalised_at_each_step_function(
|
||||
Average Reward: -8 (-120 / 15)
|
||||
"""
|
||||
with temp_primaite_session as session:
|
||||
session.close()
|
||||
session.evaluate()
|
||||
ev_rewards = session.eval_av_reward_per_episode_csv()
|
||||
assert ev_rewards[1] == -8.0
|
||||
|
||||
Reference in New Issue
Block a user