Update uc2 notebook

2024-01-25 14:43:49 +00:00
parent 73a75c497b
commit 4b98c1f630
1 changed files with 60 additions and 55 deletions
--- a/src/primaite/notebooks/uc2_demo.ipynb
+++ b/src/primaite/notebooks/uc2_demo.ipynb
@@ -345,8 +345,8 @@
     "text": [
      "/home/cade/repos/PrimAITE/venv/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
      "  from .autonotebook import tqdm as notebook_tqdm\n",
-      "2024-01-25 11:19:29,199\tINFO util.py:159 -- Missing packages: ['ipywidgets']. Run `pip install -U ipywidgets`, then restart the notebook server for rich notebook output.\n",
-      "2024-01-25 11:19:31,924\tINFO util.py:159 -- Missing packages: ['ipywidgets']. Run `pip install -U ipywidgets`, then restart the notebook server for rich notebook output.\n"
+      "2024-01-25 14:43:32,056\tINFO util.py:159 -- Missing packages: ['ipywidgets']. Run `pip install -U ipywidgets`, then restart the notebook server for rich notebook output.\n",
+      "2024-01-25 14:43:35,213\tINFO util.py:159 -- Missing packages: ['ipywidgets']. Run `pip install -U ipywidgets`, then restart the notebook server for rich notebook output.\n"
     ]
    }
   ],
@@ -502,6 +502,9 @@
    "# create the env\n",
    "with open(example_config_path(), 'r') as f:\n",
    "    cfg = yaml.safe_load(f)\n",
+    "    # set success probability to 1.0 to avoid rerunning cells.\n",
+    "    cfg['simulation']['network']['nodes'][8]['applications'][0]['options']['data_manipulation_p_of_success'] = 1.0\n",
+    "    cfg['simulation']['network']['nodes'][8]['applications'][0]['options']['port_scan_p_of_success'] = 1.0\n",
    "game = PrimaiteGame.from_config(cfg)\n",
    "env = PrimaiteGymEnv(game = game)\n",
    "# Don't flatten obs as we are not training an agent and we wish to see the dict-formatted observations\n",
@@ -515,9 +518,7 @@
   "cell_type": "markdown",
   "metadata": {},
   "source": [
-    "The red agent will start attacking at some point between step 20 and 30.\n",
-    "\n",
-    "The red agent has a random chance of failing its attack, so you may need run the following cell multiple times until the reward goes from 1.0 to -1.0."
+    "The red agent will start attacking at some point between step 20 and 30. When this happens, the reward will go from 1.0 to 0.0, and to -1.0 when the green agent tries to access the webpage."
   ]
  },
  {
@@ -529,10 +530,10 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "step: 1, Red action: DONOTHING, Blue reward:1.0\n",
-      "step: 2, Red action: DONOTHING, Blue reward:1.0\n",
-      "step: 3, Red action: DONOTHING, Blue reward:1.0\n",
-      "step: 4, Red action: DONOTHING, Blue reward:1.0\n",
+      "step: 1, Red action: DONOTHING, Blue reward:0.5\n",
+      "step: 2, Red action: DONOTHING, Blue reward:0.5\n",
+      "step: 3, Red action: DONOTHING, Blue reward:0.5\n",
+      "step: 4, Red action: DONOTHING, Blue reward:0.5\n",
      "step: 5, Red action: DONOTHING, Blue reward:1.0\n",
      "step: 6, Red action: DONOTHING, Blue reward:1.0\n",
      "step: 7, Red action: DONOTHING, Blue reward:1.0\n",
@@ -550,20 +551,22 @@
      "step: 19, Red action: DONOTHING, Blue reward:1.0\n",
      "step: 20, Red action: DONOTHING, Blue reward:1.0\n",
      "step: 21, Red action: DONOTHING, Blue reward:1.0\n",
-      "step: 22, Red action: DONOTHING, Blue reward:1.0\n",
-      "step: 23, Red action: DONOTHING, Blue reward:1.0\n",
-      "step: 24, Red action: DONOTHING, Blue reward:1.0\n",
-      "step: 25, Red action: DONOTHING, Blue reward:1.0\n",
-      "step: 26, Red action: DONOTHING, Blue reward:1.0\n",
-      "step: 27, Red action: NODE_APPLICATION_EXECUTE, Blue reward:0.0\n",
+      "step: 22, Red action: NODE_APPLICATION_EXECUTE, Blue reward:0.0\n",
+      "step: 23, Red action: DONOTHING, Blue reward:0.0\n",
+      "step: 24, Red action: DONOTHING, Blue reward:0.0\n",
+      "step: 25, Red action: DONOTHING, Blue reward:0.0\n",
+      "step: 26, Red action: DONOTHING, Blue reward:-1.0\n",
+      "step: 27, Red action: DONOTHING, Blue reward:-1.0\n",
      "step: 28, Red action: DONOTHING, Blue reward:-1.0\n",
      "step: 29, Red action: DONOTHING, Blue reward:-1.0\n",
-      "step: 30, Red action: DONOTHING, Blue reward:-1.0\n"
+      "step: 30, Red action: DONOTHING, Blue reward:-1.0\n",
+      "step: 31, Red action: DONOTHING, Blue reward:-1.0\n",
+      "step: 32, Red action: DONOTHING, Blue reward:-1.0\n"
     ]
    }
   ],
   "source": [
-    "for step in range(30):\n",
+    "for step in range(32):\n",
    "    obs, reward, terminated, truncated, info = env.step(0)\n",
    "    print(f\"step: {env.game.step_counter}, Red action: {info['agent_actions']['client_1_data_manipulation_red_bot'][0]}, Blue reward:{reward}\" )"
   ]
@@ -696,9 +699,9 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "step: 33\n",
+      "step: 35\n",
      "Red action: DONOTHING\n",
-      "Green action: DONOTHING\n",
+      "Green action: NODE_APPLICATION_EXECUTE\n",
      "Blue reward:-1.0\n"
     ]
    }
@@ -724,17 +727,17 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "step: 44\n",
+      "step: 36\n",
      "Red action: DONOTHING\n",
      "Green action: NODE_APPLICATION_EXECUTE\n",
-      "Blue reward:-1.0\n"
+      "Blue reward:0.0\n"
     ]
    }
   ],
@@ -755,43 +758,45 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 21,
+   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "step: 107, Red action: DONOTHING, Blue reward:1.0\n",
-      "step: 108, Red action: DONOTHING, Blue reward:1.0\n",
-      "step: 109, Red action: DONOTHING, Blue reward:1.0\n",
-      "step: 110, Red action: DONOTHING, Blue reward:1.0\n",
-      "step: 111, Red action: DONOTHING, Blue reward:1.0\n",
-      "step: 112, Red action: DONOTHING, Blue reward:1.0\n",
-      "step: 113, Red action: DONOTHING, Blue reward:1.0\n",
-      "step: 114, Red action: NODE_APPLICATION_EXECUTE, Blue reward:1.0\n",
-      "step: 115, Red action: DONOTHING, Blue reward:1.0\n",
-      "step: 116, Red action: DONOTHING, Blue reward:1.0\n",
-      "step: 117, Red action: DONOTHING, Blue reward:1.0\n",
-      "step: 118, Red action: DONOTHING, Blue reward:1.0\n",
-      "step: 119, Red action: DONOTHING, Blue reward:1.0\n",
-      "step: 120, Red action: DONOTHING, Blue reward:1.0\n",
-      "step: 121, Red action: DONOTHING, Blue reward:1.0\n",
-      "step: 122, Red action: DONOTHING, Blue reward:1.0\n",
-      "step: 123, Red action: DONOTHING, Blue reward:1.0\n",
-      "step: 124, Red action: DONOTHING, Blue reward:1.0\n",
-      "step: 125, Red action: DONOTHING, Blue reward:1.0\n",
-      "step: 126, Red action: DONOTHING, Blue reward:1.0\n",
-      "step: 127, Red action: DONOTHING, Blue reward:1.0\n",
-      "step: 128, Red action: DONOTHING, Blue reward:1.0\n",
-      "step: 129, Red action: DONOTHING, Blue reward:1.0\n",
-      "step: 130, Red action: DONOTHING, Blue reward:1.0\n",
-      "step: 131, Red action: DONOTHING, Blue reward:1.0\n",
-      "step: 132, Red action: DONOTHING, Blue reward:1.0\n",
-      "step: 133, Red action: DONOTHING, Blue reward:1.0\n",
-      "step: 134, Red action: NODE_APPLICATION_EXECUTE, Blue reward:1.0\n",
-      "step: 135, Red action: DONOTHING, Blue reward:1.0\n",
-      "step: 136, Red action: DONOTHING, Blue reward:1.0\n"
+      "step: 37, Red action: DONOTHING, Blue reward:0.0\n",
+      "step: 38, Red action: DONOTHING, Blue reward:0.0\n",
+      "step: 39, Red action: DONOTHING, Blue reward:1.0\n",
+      "step: 40, Red action: DONOTHING, Blue reward:1.0\n",
+      "step: 41, Red action: DONOTHING, Blue reward:1.0\n",
+      "step: 42, Red action: DONOTHING, Blue reward:1.0\n",
+      "step: 43, Red action: DONOTHING, Blue reward:1.0\n",
+      "step: 44, Red action: DONOTHING, Blue reward:1.0\n",
+      "step: 45, Red action: DONOTHING, Blue reward:1.0\n",
+      "step: 46, Red action: NODE_APPLICATION_EXECUTE, Blue reward:1.0\n",
+      "step: 47, Red action: DONOTHING, Blue reward:1.0\n",
+      "step: 48, Red action: DONOTHING, Blue reward:1.0\n",
+      "step: 49, Red action: DONOTHING, Blue reward:1.0\n",
+      "step: 50, Red action: DONOTHING, Blue reward:1.0\n",
+      "step: 51, Red action: DONOTHING, Blue reward:1.0\n",
+      "step: 52, Red action: DONOTHING, Blue reward:1.0\n",
+      "step: 53, Red action: DONOTHING, Blue reward:1.0\n",
+      "step: 54, Red action: DONOTHING, Blue reward:1.0\n",
+      "step: 55, Red action: DONOTHING, Blue reward:1.0\n",
+      "step: 56, Red action: DONOTHING, Blue reward:1.0\n",
+      "step: 57, Red action: DONOTHING, Blue reward:1.0\n",
+      "step: 58, Red action: DONOTHING, Blue reward:1.0\n",
+      "step: 59, Red action: DONOTHING, Blue reward:1.0\n",
+      "step: 60, Red action: DONOTHING, Blue reward:1.0\n",
+      "step: 61, Red action: DONOTHING, Blue reward:1.0\n",
+      "step: 62, Red action: DONOTHING, Blue reward:1.0\n",
+      "step: 63, Red action: DONOTHING, Blue reward:1.0\n",
+      "step: 64, Red action: DONOTHING, Blue reward:1.0\n",
+      "step: 65, Red action: DONOTHING, Blue reward:1.0\n",
+      "step: 66, Red action: DONOTHING, Blue reward:1.0\n",
+      "step: 67, Red action: DONOTHING, Blue reward:1.0\n",
+      "step: 68, Red action: DONOTHING, Blue reward:1.0\n"
     ]
    }
   ],
@@ -823,7 +828,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 24,
+   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
@@ -901,7 +906,7 @@
       "  'protocol': 0}}"
      ]
     },
-     "execution_count": 24,
+     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }