training_config: rl_framework: RLLIB_single_agent rl_algorithm: PPO seed: 333 n_learn_episodes: 1 n_eval_episodes: 5 max_steps_per_episode: 128 deterministic_eval: false n_agents: 1 agent_references: - defender io_settings: save_checkpoints: true checkpoint_interval: 5 game_config: ports: - ARP - DNS - HTTP - POSTGRES_SERVER protocols: - ICMP - TCP - UDP agents: - ref: client_1_green_user team: GREEN type: GreenWebBrowsingAgent observation_space: type: UC2GreenObservation action_space: action_list: - type: DONOTHING # # - type: NODE_LOGON # - type: NODE_LOGOFF # - type: NODE_APPLICATION_EXECUTE # options: # execution_definition: # target_address: arcd.com options: nodes: - node_ref: client_2 max_folders_per_node: 1 max_files_per_folder: 1 max_services_per_node: 1 max_nics_per_node: 2 max_acl_rules: 10 reward_function: reward_components: - type: DUMMY agent_settings: start_step: 5 frequency: 4 variance: 3 - ref: client_1_data_manipulation_red_bot team: RED type: RedDatabaseCorruptingAgent observation_space: type: UC2RedObservation options: nodes: - node_ref: client_1 observations: - logon_status - operating_status services: - service_ref: data_manipulation_bot observations: operating_status health_status folders: {} action_space: action_list: - type: DONOTHING #