Usage Example of the Attacks of ADARO-RL

imports

[1]:
import os
from pathlib import Path

import numpy as np
from adaro_rl.attacks.registry import make_attack

import adaro_rl
from adaro_rl.zoo.agent import make_agent
from adaro_rl.zoo.environment import make_env
/home/jovyan/Maturation/env-adaro-rl/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html
  from .autonotebook import tqdm as notebook_tqdm
[2]:
config_name="Enduro-v5"

attack_name="FGM_D"
target="untargeted"
eps=20
norm=0
adversary_checkpoint=None

n_eval_episodes=2

device="cuda" # or cpu
seed=0

make env

[3]:
config = adaro_rl.zoo.configs[config_name]

env = make_env(
    **config.train_env_config,
    adv_wrapper_class=None,
    seed=seed
)
A.L.E: Arcade Learning Environment (version 0.10.1+unknown)
[Powered by Stella]

Download agent model checkpoint

[4]:
agent_dir = os.path.join("agents", f"{config_name}")
agent_path = Path(agent_dir)
agent_path.mkdir(parents=True, exist_ok=True)

print("📥 Download the agent")
if not os.path.isfile(os.path.join(agent_dir,"model.zip")):
    adaro_rl.zoo.download_model(config_name, local_dir=agent_dir)
else:
    print(f"{os.path.join(agent_dir,'model.zip')} already exists")
📥 Download the agent
agents/Enduro-v5/model.zip already exists

make agent

[5]:
make_agent_fct = lambda : make_agent(
    **config.agent_config,
    env=env,
    checkpoint=os.path.join(agent_dir,"model.zip"),
    device=device,
    seed=seed
    )
agent = make_agent_fct()
Wrapping the env in a VecTransposeImage.
/home/jovyan/Maturation/env-adaro-rl/lib/python3.10/site-packages/stable_baselines3/common/on_policy_algorithm.py:150: UserWarning: You are trying to run PPO on the GPU, but it is primarily intended to run on the CPU when not using a CNN policy (you are using ActorCriticPolicy which should be a MlpPolicy). See https://github.com/DLR-RM/stable-baselines3/issues/1245 for more info. You can pass `device='cpu'` or `export CUDA_VISIBLE_DEVICES=` to force using the CPU.Note: The model will train, but the GPU utilization will be poor and the training might take longer than on CPU.
  warnings.warn(

make adversary

[6]:
if adversary_checkpoint is not None:
    make_adversary_fct = lambda : make_agent(
        **config.adversary_config,
        env=env,
        checkpoint=os.path.join(adversary_checkpoint),
        device=device,
        seed=seed
        )
    make_agent_for_attack_fct = make_adversary_fct
else:
    make_agent_for_attack_fct = make_agent_fct

make attack

[7]:
attack = make_attack(
    attack_name = attack_name,
    make_agent_fct = make_agent_for_attack_fct,
    target = target,
    obs_space = env.observation_space,
    perturb_space = env.get_attr("observation_perturbation_space")[0],
    is_proportional_mask = env.get_attr("proportional_obs_perturbation_mask")[0],
    eps = eps,
    norm = norm,
    device=device
)
Wrapping the env in a VecTransposeImage.

run the agent in the environment with attacks perturbing the observations

[8]:
episode_rewards = []
perturbations = []

for _ in range(n_eval_episodes):
    done = False
    obs = env.reset()
    total_reward = 0.0

    step = 0
    while not done:
        adv_obs = attack.generate_adv_obs(obs)
        perturbations.append(adv_obs.flatten()-obs.flatten())
        action, _ = agent.predict(adv_obs)
        obs, reward, done, info = env.step(action)
        total_reward += reward
        step+=1

    episode_rewards.append(total_reward)

perturbations_array = np.array(perturbations)
empirical_eps = np.linalg.norm(perturbations_array, ord=norm, axis=1)

print(f"reward = {np.mean(episode_rewards)} ± {np.std(episode_rewards)}  (over {n_eval_episodes} episodes)")
print(f"eps =  {np.mean(empirical_eps):.2f} ± {np.std(empirical_eps):.2f};  minimum = {np.min(empirical_eps):.2f};  maximum = {np.max(empirical_eps):.2f}  (over {len(perturbations)} perturbations)")
/home/jovyan/Maturation/new-adaro-rl/adaro_rl/attacks/base_attack.py:181: RuntimeWarning: invalid value encountered in multiply
  perturbation_map_batch *= self.flat_max_eps
/home/jovyan/Maturation/new-adaro-rl/adaro_rl/attacks/base_attack.py:261: RuntimeWarning: invalid value encountered in cast
  return perturbed_observation_batch_float.astype(original_type)
reward = 301.5 ± 128.5  (over 2 episodes)
eps =  9.57 ± 5.90;  minimum = 0.00;  maximum = 20.00  (over 2484 perturbations)
[ ]:

[ ]: