Coverage for adaro_rl / pipelines / test.py: 91%
35 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-14 07:50 +0000
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-14 07:50 +0000
1import numpy as np
2import pandas as pd
3import os
6def test(
7 config,
8 output_dir="agent",
9 checkpoint=None,
10 render=False,
11 device="cpu",
12 seed=None,
13 n_eval_episodes=None,
14):
15 """
16 Evaluate a trained reinforcement learning agent on a test environment and log performance metrics.
18 This function loads a trained agent from the specified directory, creates both a training and an
19 evaluation environment, and then evaluates the agent's performance in terms of rewards and episode
20 lengths. The results (mean and standard deviation of rewards) are printed and saved to a CSV file
21 in the agent's results directory.
23 Parameters
24 ----------
25 config : object
26 Configuration object containing necessary settings.
27 output_dir : str, optional
28 Path to the directory where the agent files and evaluation results are stored.
29 Default is "agent".
30 checkpoint : str, optional
31 Path to the trained agent checkpoint to be loaded. Default is None.
32 render : bool, optional
33 If True, the environment will be rendered during evaluation.
34 Requires proper display configuration. Default is False.
35 device : str, optional
36 Computation device, e.g., "cpu" or "cuda:0". Default is "cpu".
37 seed : int, optional
38 Random seed for reproducibility. Default is 0.
40 Returns
41 -------
42 None
43 """
45 # SAVESPACE ####################
47 os.makedirs(output_dir, exist_ok=True)
48 result_path = os.path.join(output_dir, "result.csv")
50 if n_eval_episodes is None:
51 n_eval_episodes = config.n_eval_episodes
53 # ENV ####################
55 if render:
56 env_config = config.render_env_config
57 else:
58 env_config = config.eval_env_config
59 os.environ["SDL_VIDEODRIVER"] = "dummy"
61 training_env = config.make_env(
62 env_id=env_config["env_id"],
63 n_envs=env_config["n_envs"],
64 n_frame_stack=env_config["n_frame_stack"],
65 wrapper_class=env_config["wrapper_class"],
66 adv_wrapper_class=None,
67 env_kwargs=env_config["env_kwargs"],
68 seed=seed,
69 )
71 eval_env = config.make_env(
72 env_id=env_config["env_id"],
73 n_envs=env_config["n_envs"],
74 n_frame_stack=env_config["n_frame_stack"],
75 wrapper_class=env_config["wrapper_class"],
76 adv_wrapper_class=None,
77 env_kwargs=env_config["env_kwargs"],
78 seed=seed,
79 )
81 # Agent ####################
83 agent = config.make_agent(
84 algo=config.agent_config["algo"],
85 env=training_env,
86 checkpoint=checkpoint,
87 device=device,
88 seed=seed,
89 algo_kwargs=config.agent_config["algo_kwargs"],
90 )
92 agent.eval()
94 # RUN ####################
96 rewards, lengths = config.eval_config["evaluate_policy_fct"](
97 agent,
98 eval_env,
99 n_eval_episodes=n_eval_episodes,
100 render=render,
101 deterministic=config.eval_config["deterministic_eval"],
102 return_episode_rewards=True,
103 )
105 # DISPLAY ####################
107 mean_lengths = np.mean(lengths)
108 std_lengths = np.std(lengths)
109 print()
110 print("lengths : {}".format(lengths))
111 print("mean : {}".format(mean_lengths))
112 print("std : {}".format(std_lengths))
114 mean_reward = np.mean(rewards)
115 std_reward = np.std(rewards)
116 print()
117 print("rewards : {}".format(rewards))
118 print("mean : {}".format(mean_reward))
119 print("std : {}".format(std_reward))
121 # SAVE ####################
123 if os.path.isfile(result_path):
124 df = pd.read_csv(result_path, index_col=0)
125 else:
126 df = pd.DataFrame(columns=["mean reward", "std reward"])
127 df.loc[0] = {"mean reward": mean_reward, "std reward": std_reward}
128 df = df.sort_index()
129 df.to_csv(result_path)