Fix minor reward sharing bugs

This commit is contained in:
Marek Wolan
2024-03-12 11:40:26 +00:00
parent 03ee976a2d
commit 24fdb8dc17
4 changed files with 24 additions and 24 deletions

View File

@@ -450,7 +450,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"Now the reward is -1, let's have a look at blue agent's observation."
"Now the reward is -0.8, let's have a look at blue agent's observation."
]
},
{
@@ -510,9 +510,9 @@
"source": [
"obs, reward, terminated, truncated, info = env.step(13) # patch the database\n",
"print(f\"step: {env.game.step_counter}\")\n",
"print(f\"Red action: {info['agent_actions']['data_manipulation_attacker']['action']}\" )\n",
"print(f\"Green action: {info['agent_actions']['client_1_green_user']['action']}\" )\n",
"print(f\"Green action: {info['agent_actions']['client_2_green_user']['action']}\" )\n",
"print(f\"Red action: {info['agent_actions']['data_manipulation_attacker'].action}\" )\n",
"print(f\"Green action: {info['agent_actions']['client_1_green_user'].action}\" )\n",
"print(f\"Green action: {info['agent_actions']['client_2_green_user'].action}\" )\n",
"print(f\"Blue reward:{reward}\" )"
]
},
@@ -533,9 +533,9 @@
"metadata": {},
"outputs": [],
"source": [
"obs, reward, terminated, truncated, info = env.step(0) # patch the database\n",
"obs, reward, terminated, truncated, info = env.step(0) # do nothing\n",
"print(f\"step: {env.game.step_counter}\")\n",
"print(f\"Red action: {info['agent_actions']['data_manipulation_attacker']['action']}\" )\n",
"print(f\"Red action: {info['agent_actions']['data_manipulation_attacker'].action}\" )\n",
"print(f\"Green action: {info['agent_actions']['client_2_green_user']}\" )\n",
"print(f\"Green action: {info['agent_actions']['client_1_green_user']}\" )\n",
"print(f\"Blue reward:{reward:.2f}\" )"
@@ -557,17 +557,19 @@
"outputs": [],
"source": [
"env.step(13) # Patch the database\n",
"print(f\"step: {env.game.step_counter}, Red action: {info['agent_actions']['data_manipulation_attacker']['action']}, Blue reward:{reward:.2f}\" )\n",
"print(f\"step: {env.game.step_counter}, Red action: {info['agent_actions']['data_manipulation_attacker'].action}, Blue reward:{reward:.2f}\" )\n",
"\n",
"env.step(50) # Block client 1\n",
"print(f\"step: {env.game.step_counter}, Red action: {info['agent_actions']['data_manipulation_attacker']['action']}, Blue reward:{reward:.2f}\" )\n",
"print(f\"step: {env.game.step_counter}, Red action: {info['agent_actions']['data_manipulation_attacker'].action}, Blue reward:{reward:.2f}\" )\n",
"\n",
"env.step(51) # Block client 2\n",
"print(f\"step: {env.game.step_counter}, Red action: {info['agent_actions']['data_manipulation_attacker']['action']}, Blue reward:{reward:.2f}\" )\n",
"print(f\"step: {env.game.step_counter}, Red action: {info['agent_actions']['data_manipulation_attacker'].action}, Blue reward:{reward:.2f}\" )\n",
"\n",
"for step in range(30):\n",
"while abs(reward - 0.8) > 1e-5:\n",
" obs, reward, terminated, truncated, info = env.step(0) # do nothing\n",
" print(f\"step: {env.game.step_counter}, Red action: {info['agent_actions']['data_manipulation_attacker']['action']}, Blue reward:{reward:.2f}\" )"
" print(f\"step: {env.game.step_counter}, Red action: {info['agent_actions']['data_manipulation_attacker'].action}, Blue reward:{reward:.2f}\" )\n",
" if env.game.step_counter > 10000:\n",
" break # make sure there's no infinite loop if something went wrong"
]
},
{
@@ -617,17 +619,19 @@
" if obs['NODES'][6]['NETWORK_INTERFACES'][1]['nmne']['outbound'] == 1:\n",
" # client 1 has NMNEs, let's block it\n",
" obs, reward, terminated, truncated, info = env.step(50) # block client 1\n",
" print(\"blocking client 1\")\n",
" break\n",
" elif obs['NODES'][7]['NETWORK_INTERFACES'][1]['nmne']['outbound'] == 1:\n",
" # client 2 has NMNEs, so let's block it\n",
" obs, reward, terminated, truncated, info = env.step(51) # block client 2\n",
" print(\"blocking client 2\")\n",
" break\n",
" if tries>100:\n",
" print(\"Error: NMNE never increased\")\n",
" break\n",
"\n",
"env.step(13) # Patch the database\n",
"..."
"print()\n"
]
},
{
@@ -646,14 +650,14 @@
"\n",
"for step in range(40):\n",
" obs, reward, terminated, truncated, info = env.step(0) # do nothing\n",
" print(f\"step: {env.game.step_counter}, Red action: {info['agent_actions']['data_manipulation_attacker']['action']}, Blue reward:{reward:.2f}\" )"
" print(f\"step: {env.game.step_counter}, Red action: {info['agent_actions']['data_manipulation_attacker'].action}, Blue reward:{reward:.2f}\" )"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Reset the environment, you can rerun the other cells to verify that the attack works the same every episode."
"Reset the environment, you can rerun the other cells to verify that the attack works the same every episode. (except the red agent will move between `client_1` and `client_2`.)"
]
},
{