From 99f1f7cfc149b5e5d8a59796b5f1ada27fd0ae64 Mon Sep 17 00:00:00 2001 From: Czar Echavez Date: Thu, 6 Jul 2023 12:10:26 +0100 Subject: [PATCH] #1386: remove setting of global seed + running pre-commit checks --- src/primaite/agents/agent.py | 1 - src/primaite/agents/sb3.py | 7 ++---- src/primaite/environment/primaite_env.py | 6 ----- .../test_seeding_and_deterministic_session.py | 22 ++++++------------- 4 files changed, 9 insertions(+), 27 deletions(-) diff --git a/src/primaite/agents/agent.py b/src/primaite/agents/agent.py index 5b76c36b..4b39839a 100644 --- a/src/primaite/agents/agent.py +++ b/src/primaite/agents/agent.py @@ -248,7 +248,6 @@ class AgentSessionABC(ABC): agent.session_path = path - return agent else: diff --git a/src/primaite/agents/sb3.py b/src/primaite/agents/sb3.py index 0d031c10..9d295c6f 100644 --- a/src/primaite/agents/sb3.py +++ b/src/primaite/agents/sb3.py @@ -59,7 +59,7 @@ class SB3Agent(AgentSessionABC): verbose=self.sb3_output_verbose_level, n_steps=self._training_config.num_steps, tensorboard_log=str(self._tensorboard_log_path), - seed=self._training_config.seed + seed=self._training_config.seed, ) def _save_checkpoint(self): @@ -121,10 +121,7 @@ class SB3Agent(AgentSessionABC): obs = self._env.reset() for step in range(time_steps): - action, _states = self._agent.predict( - obs, - deterministic=self._training_config.deterministic - ) + action, _states = self._agent.predict(obs, deterministic=self._training_config.deterministic) if isinstance(action, np.ndarray): action = np.int64(action) obs, rewards, done, info = self._env.step(action) diff --git a/src/primaite/environment/primaite_env.py b/src/primaite/environment/primaite_env.py index cd2ff9e5..03c23f93 100644 --- a/src/primaite/environment/primaite_env.py +++ b/src/primaite/environment/primaite_env.py @@ -2,7 +2,6 @@ """Main environment module containing the PRIMmary AI Training Evironment (Primaite) class.""" import copy import logging -import random import uuid as uuid from pathlib import Path from random import choice, randint, sample, uniform @@ -241,11 +240,6 @@ class Primaite(Env): self.episode_av_reward_writer = SessionOutputWriter(self, transaction_writer=False, learning_session=True) self.transaction_writer = SessionOutputWriter(self, transaction_writer=True, learning_session=True) - # set the seed globally if there is one - if self.training_config.seed: - random.seed(self.training_config.seed) - np.random.seed(self.training_config.seed) - @property def actual_episode_count(self) -> int: """Shifts the episode_count by -1 for RLlib.""" diff --git a/tests/test_seeding_and_deterministic_session.py b/tests/test_seeding_and_deterministic_session.py index 0e420459..34cb43fb 100644 --- a/tests/test_seeding_and_deterministic_session.py +++ b/tests/test_seeding_and_deterministic_session.py @@ -6,15 +6,11 @@ from tests import TEST_CONFIG_ROOT @pytest.mark.parametrize( "temp_primaite_session", - [[ - TEST_CONFIG_ROOT / "ppo_seeded_training_config.yaml", - dos_very_basic_config_path() - ]], + [[TEST_CONFIG_ROOT / "ppo_seeded_training_config.yaml", dos_very_basic_config_path()]], indirect=True, ) def test_seeded_learning(temp_primaite_session): """Test running seeded learning produces the same output when ran twice.""" - expected_mean_reward_per_episode = { 1: -90.703125, 2: -91.15234375, @@ -25,26 +21,22 @@ def test_seeded_learning(temp_primaite_session): 7: -88.984375, 8: -88.3203125, 9: -112.79296875, - 10: -100.01953125 + 10: -100.01953125, } with temp_primaite_session as session: - assert session._training_config.seed == 67890, \ - "Expected output is based upon a agent that was trained with " \ - "seed 67890" + assert session._training_config.seed == 67890, ( + "Expected output is based upon a agent that was trained with " "seed 67890" + ) session.learn() actual_mean_reward_per_episode = session.learn_av_reward_per_episode() assert actual_mean_reward_per_episode == expected_mean_reward_per_episode -@pytest.mark.skip(reason="Inconsistent results. Needs someone with RL " - "knowledge to investigate further.") +@pytest.mark.skip(reason="Inconsistent results. Needs someone with RL " "knowledge to investigate further.") @pytest.mark.parametrize( "temp_primaite_session", - [[ - TEST_CONFIG_ROOT / "ppo_seeded_training_config.yaml", - dos_very_basic_config_path() - ]], + [[TEST_CONFIG_ROOT / "ppo_seeded_training_config.yaml", dos_very_basic_config_path()]], indirect=True, ) def test_deterministic_evaluation(temp_primaite_session):