{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "2dc96ce3-b6a2-4e34-9f82-f0640c2cc38e",
   "metadata": {},
   "source": [
    "# **Usage Example of the Attacks of ADARO-RL**"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "67efd4f9-6109-46d4-a06e-3d6957a816e9",
   "metadata": {},
   "source": [
    "## imports"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "7e3830a6",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/jovyan/Maturation/env-adaro-rl/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
      "  from .autonotebook import tqdm as notebook_tqdm\n"
     ]
    }
   ],
   "source": [
    "import os\n",
    "from pathlib import Path\n",
    "\n",
    "import numpy as np\n",
    "from adaro_rl.attacks.registry import make_attack\n",
    "\n",
    "import adaro_rl\n",
    "from adaro_rl.zoo.agent import make_agent\n",
    "from adaro_rl.zoo.environment import make_env"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "015dc1ef-68ca-4dc4-b341-ffab44b24123",
   "metadata": {},
   "outputs": [],
   "source": [
    "config_name=\"Enduro-v5\"\n",
    "\n",
    "attack_name=\"FGM_D\"\n",
    "target=\"untargeted\"\n",
    "eps=20\n",
    "norm=0\n",
    "adversary_checkpoint=None\n",
    "\n",
    "n_eval_episodes=2\n",
    "\n",
    "device=\"cuda\" # or cpu\n",
    "seed=0"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "452dd4ad",
   "metadata": {},
   "source": [
    "## make env"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "0ff6a3f2-3265-4c21-b238-96f118a5de78",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "A.L.E: Arcade Learning Environment (version 0.10.1+unknown)\n",
      "[Powered by Stella]\n"
     ]
    }
   ],
   "source": [
    "config = adaro_rl.zoo.configs[config_name]\n",
    "\n",
    "env = make_env(\n",
    "    **config.train_env_config,\n",
    "    adv_wrapper_class=None,\n",
    "    seed=seed\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "e77251d3-5887-49e4-b6c2-34410c6468a6",
   "metadata": {},
   "source": [
    "## Download agent model checkpoint"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "a998beb4-7abc-4e7b-b145-33dcdf9069f7",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "📥 Download the agent\n",
      "agents/Enduro-v5/model.zip already exists\n"
     ]
    }
   ],
   "source": [
    "agent_dir = os.path.join(\"agents\", f\"{config_name}\")\n",
    "agent_path = Path(agent_dir)\n",
    "agent_path.mkdir(parents=True, exist_ok=True)\n",
    "\n",
    "print(\"📥 Download the agent\")\n",
    "if not os.path.isfile(os.path.join(agent_dir,\"model.zip\")):\n",
    "    adaro_rl.zoo.download_model(config_name, local_dir=agent_dir)\n",
    "else:\n",
    "    print(f\"{os.path.join(agent_dir,'model.zip')} already exists\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "7a522882-8130-4e41-8cc9-a61bbd49ded4",
   "metadata": {},
   "source": [
    "## make agent"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "7f4dec43-c82f-4b0d-817a-3eb0aef61a95",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Wrapping the env in a VecTransposeImage.\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/jovyan/Maturation/env-adaro-rl/lib/python3.10/site-packages/stable_baselines3/common/on_policy_algorithm.py:150: UserWarning: You are trying to run PPO on the GPU, but it is primarily intended to run on the CPU when not using a CNN policy (you are using ActorCriticPolicy which should be a MlpPolicy). See https://github.com/DLR-RM/stable-baselines3/issues/1245 for more info. You can pass `device='cpu'` or `export CUDA_VISIBLE_DEVICES=` to force using the CPU.Note: The model will train, but the GPU utilization will be poor and the training might take longer than on CPU.\n",
      "  warnings.warn(\n"
     ]
    }
   ],
   "source": [
    "make_agent_fct = lambda : make_agent(\n",
    "    **config.agent_config,\n",
    "    env=env,\n",
    "    checkpoint=os.path.join(agent_dir,\"model.zip\"),\n",
    "    device=device,\n",
    "    seed=seed\n",
    "    )\n",
    "agent = make_agent_fct()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "4d554ebd-d5bf-41ec-815a-9137b925a27c",
   "metadata": {},
   "source": [
    "## make adversary"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "acae757d-315e-4dce-b3c7-91247d59e487",
   "metadata": {},
   "outputs": [],
   "source": [
    "if adversary_checkpoint is not None:\n",
    "    make_adversary_fct = lambda : make_agent(\n",
    "        **config.adversary_config,\n",
    "        env=env,\n",
    "        checkpoint=os.path.join(adversary_checkpoint),\n",
    "        device=device,\n",
    "        seed=seed\n",
    "        )\n",
    "    make_agent_for_attack_fct = make_adversary_fct\n",
    "else:\n",
    "    make_agent_for_attack_fct = make_agent_fct"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "2f864baf-508e-43f7-87dd-b1f669e2b0f5",
   "metadata": {},
   "source": [
    "## make attack"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "33584d66-1c6b-4213-abbf-35c080052353",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Wrapping the env in a VecTransposeImage.\n"
     ]
    }
   ],
   "source": [
    "attack = make_attack(\n",
    "    attack_name = attack_name,\n",
    "    make_agent_fct = make_agent_for_attack_fct,\n",
    "    target = target,\n",
    "    obs_space = env.observation_space,\n",
    "    perturb_space = env.get_attr(\"observation_perturbation_space\")[0],\n",
    "    is_proportional_mask = env.get_attr(\"proportional_obs_perturbation_mask\")[0],\n",
    "    eps = eps,\n",
    "    norm = norm,\n",
    "    device=device\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "aa8aa656-2f5e-4dbe-9921-3dda147d1992",
   "metadata": {},
   "source": [
    "## run the agent in the environment with attacks perturbing the observations"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "54a2020b-0f25-4ef2-a902-9108e374f2a0",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/jovyan/Maturation/new-adaro-rl/adaro_rl/attacks/base_attack.py:181: RuntimeWarning: invalid value encountered in multiply\n",
      "  perturbation_map_batch *= self.flat_max_eps\n",
      "/home/jovyan/Maturation/new-adaro-rl/adaro_rl/attacks/base_attack.py:261: RuntimeWarning: invalid value encountered in cast\n",
      "  return perturbed_observation_batch_float.astype(original_type)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "reward = 301.5 ± 128.5  (over 2 episodes)\n",
      "eps =  9.57 ± 5.90;  minimum = 0.00;  maximum = 20.00  (over 2484 perturbations)\n"
     ]
    }
   ],
   "source": [
    "episode_rewards = []\n",
    "perturbations = []\n",
    "\n",
    "for _ in range(n_eval_episodes):\n",
    "    done = False\n",
    "    obs = env.reset()\n",
    "    total_reward = 0.0\n",
    "\n",
    "    step = 0\n",
    "    while not done:\n",
    "        adv_obs = attack.generate_adv_obs(obs)\n",
    "        perturbations.append(adv_obs.flatten()-obs.flatten())\n",
    "        action, _ = agent.predict(adv_obs)\n",
    "        obs, reward, done, info = env.step(action)\n",
    "        total_reward += reward\n",
    "        step+=1\n",
    "\n",
    "    episode_rewards.append(total_reward)\n",
    "\n",
    "perturbations_array = np.array(perturbations)\n",
    "empirical_eps = np.linalg.norm(perturbations_array, ord=norm, axis=1)\n",
    "\n",
    "print(f\"reward = {np.mean(episode_rewards)} ± {np.std(episode_rewards)}  (over {n_eval_episodes} episodes)\")\n",
    "print(f\"eps =  {np.mean(empirical_eps):.2f} ± {np.std(empirical_eps):.2f};  minimum = {np.min(empirical_eps):.2f};  maximum = {np.max(empirical_eps):.2f}  (over {len(perturbations)} perturbations)\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "4d0ed528-9994-466b-93f1-bcea4eeafe8b",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ed4efa0b-a471-4438-bfde-1433999cbfc9",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "env-adaro-rl",
   "language": "python",
   "name": "env-adaro-rl"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.16"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}