Fix minor reward sharing bugs

2024-03-12 11:40:26 +00:00
parent 03ee976a2d
commit 24fdb8dc17
4 changed files with 24 additions and 24 deletions
--- a/src/primaite/notebooks/Data-Manipulation-E2E-Demonstration.ipynb
+++ b/src/primaite/notebooks/Data-Manipulation-E2E-Demonstration.ipynb
@@ -450,7 +450,7 @@
   "cell_type": "markdown",
   "metadata": {},
   "source": [
-    "Now the reward is -1, let's have a look at blue agent's observation."
+    "Now the reward is -0.8, let's have a look at blue agent's observation."
   ]
  },
  {
@@ -510,9 +510,9 @@
   "source": [
    "obs, reward, terminated, truncated, info = env.step(13)  # patch the database\n",
    "print(f\"step: {env.game.step_counter}\")\n",
-    "print(f\"Red action: {info['agent_actions']['data_manipulation_attacker']['action']}\" )\n",
-    "print(f\"Green action: {info['agent_actions']['client_1_green_user']['action']}\" )\n",
-    "print(f\"Green action: {info['agent_actions']['client_2_green_user']['action']}\" )\n",
+    "print(f\"Red action: {info['agent_actions']['data_manipulation_attacker'].action}\" )\n",
+    "print(f\"Green action: {info['agent_actions']['client_1_green_user'].action}\" )\n",
+    "print(f\"Green action: {info['agent_actions']['client_2_green_user'].action}\" )\n",
    "print(f\"Blue reward:{reward}\" )"
   ]
  },
@@ -533,9 +533,9 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "obs, reward, terminated, truncated, info = env.step(0)  # patch the database\n",
+    "obs, reward, terminated, truncated, info = env.step(0)  # do nothing\n",
    "print(f\"step: {env.game.step_counter}\")\n",
-    "print(f\"Red action: {info['agent_actions']['data_manipulation_attacker']['action']}\" )\n",
+    "print(f\"Red action: {info['agent_actions']['data_manipulation_attacker'].action}\" )\n",
    "print(f\"Green action: {info['agent_actions']['client_2_green_user']}\" )\n",
    "print(f\"Green action: {info['agent_actions']['client_1_green_user']}\" )\n",
    "print(f\"Blue reward:{reward:.2f}\" )"
@@ -557,17 +557,19 @@
   "outputs": [],
   "source": [
    "env.step(13)  # Patch the database\n",
-    "print(f\"step: {env.game.step_counter}, Red action: {info['agent_actions']['data_manipulation_attacker']['action']}, Blue reward:{reward:.2f}\" )\n",
+    "print(f\"step: {env.game.step_counter}, Red action: {info['agent_actions']['data_manipulation_attacker'].action}, Blue reward:{reward:.2f}\" )\n",
    "\n",
    "env.step(50)  # Block client 1\n",
-    "print(f\"step: {env.game.step_counter}, Red action: {info['agent_actions']['data_manipulation_attacker']['action']}, Blue reward:{reward:.2f}\" )\n",
+    "print(f\"step: {env.game.step_counter}, Red action: {info['agent_actions']['data_manipulation_attacker'].action}, Blue reward:{reward:.2f}\" )\n",
    "\n",
    "env.step(51)  # Block client 2\n",
-    "print(f\"step: {env.game.step_counter}, Red action: {info['agent_actions']['data_manipulation_attacker']['action']}, Blue reward:{reward:.2f}\" )\n",
+    "print(f\"step: {env.game.step_counter}, Red action: {info['agent_actions']['data_manipulation_attacker'].action}, Blue reward:{reward:.2f}\" )\n",
    "\n",
-    "for step in range(30):\n",
+    "while abs(reward - 0.8) > 1e-5:\n",
    "    obs, reward, terminated, truncated, info = env.step(0)  # do nothing\n",
-    "    print(f\"step: {env.game.step_counter}, Red action: {info['agent_actions']['data_manipulation_attacker']['action']}, Blue reward:{reward:.2f}\" )"
+    "    print(f\"step: {env.game.step_counter}, Red action: {info['agent_actions']['data_manipulation_attacker'].action}, Blue reward:{reward:.2f}\" )\n",
+    "    if env.game.step_counter > 10000:\n",
+    "        break # make sure there's no infinite loop if something went wrong"
   ]
  },
  {
@@ -617,17 +619,19 @@
    "    if obs['NODES'][6]['NETWORK_INTERFACES'][1]['nmne']['outbound'] == 1:\n",
    "        # client 1 has NMNEs, let's block it\n",
    "        obs, reward, terminated, truncated, info = env.step(50) # block client 1\n",
+    "        print(\"blocking client 1\")\n",
    "        break\n",
    "    elif obs['NODES'][7]['NETWORK_INTERFACES'][1]['nmne']['outbound'] == 1:\n",
    "        # client 2 has NMNEs, so let's block it\n",
    "        obs, reward, terminated, truncated, info = env.step(51) # block client 2\n",
+    "        print(\"blocking client 2\")\n",
    "        break\n",
    "    if tries>100:\n",
    "        print(\"Error: NMNE never increased\")\n",
    "        break\n",
    "\n",
    "env.step(13)  # Patch the database\n",
-    "..."
+    "print()\n"
   ]
  },
  {
@@ -646,14 +650,14 @@
    "\n",
    "for step in range(40):\n",
    "    obs, reward, terminated, truncated, info = env.step(0)  # do nothing\n",
-    "    print(f\"step: {env.game.step_counter}, Red action: {info['agent_actions']['data_manipulation_attacker']['action']}, Blue reward:{reward:.2f}\" )"
+    "    print(f\"step: {env.game.step_counter}, Red action: {info['agent_actions']['data_manipulation_attacker'].action}, Blue reward:{reward:.2f}\" )"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
-    "Reset the environment, you can rerun the other cells to verify that the attack works the same every episode."
+    "Reset the environment, you can rerun the other cells to verify that the attack works the same every episode. (except the red agent will move between `client_1` and `client_2`.)"
   ]
  },
  {