Coverage for adaro_rl / pipelines / test.py: 91%

35 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-04-14 07:50 +0000

1import numpy as np 

2import pandas as pd 

3import os 

4 

5 

6def test( 

7 config, 

8 output_dir="agent", 

9 checkpoint=None, 

10 render=False, 

11 device="cpu", 

12 seed=None, 

13 n_eval_episodes=None, 

14): 

15 """ 

16 Evaluate a trained reinforcement learning agent on a test environment and log performance metrics. 

17 

18 This function loads a trained agent from the specified directory, creates both a training and an 

19 evaluation environment, and then evaluates the agent's performance in terms of rewards and episode 

20 lengths. The results (mean and standard deviation of rewards) are printed and saved to a CSV file 

21 in the agent's results directory. 

22 

23 Parameters 

24 ---------- 

25 config : object 

26 Configuration object containing necessary settings. 

27 output_dir : str, optional 

28 Path to the directory where the agent files and evaluation results are stored. 

29 Default is "agent". 

30 checkpoint : str, optional 

31 Path to the trained agent checkpoint to be loaded. Default is None. 

32 render : bool, optional 

33 If True, the environment will be rendered during evaluation. 

34 Requires proper display configuration. Default is False. 

35 device : str, optional 

36 Computation device, e.g., "cpu" or "cuda:0". Default is "cpu". 

37 seed : int, optional 

38 Random seed for reproducibility. Default is 0. 

39 

40 Returns 

41 ------- 

42 None 

43 """ 

44 

45 # SAVESPACE #################### 

46 

47 os.makedirs(output_dir, exist_ok=True) 

48 result_path = os.path.join(output_dir, "result.csv") 

49 

50 if n_eval_episodes is None: 

51 n_eval_episodes = config.n_eval_episodes 

52 

53 # ENV #################### 

54 

55 if render: 

56 env_config = config.render_env_config 

57 else: 

58 env_config = config.eval_env_config 

59 os.environ["SDL_VIDEODRIVER"] = "dummy" 

60 

61 training_env = config.make_env( 

62 env_id=env_config["env_id"], 

63 n_envs=env_config["n_envs"], 

64 n_frame_stack=env_config["n_frame_stack"], 

65 wrapper_class=env_config["wrapper_class"], 

66 adv_wrapper_class=None, 

67 env_kwargs=env_config["env_kwargs"], 

68 seed=seed, 

69 ) 

70 

71 eval_env = config.make_env( 

72 env_id=env_config["env_id"], 

73 n_envs=env_config["n_envs"], 

74 n_frame_stack=env_config["n_frame_stack"], 

75 wrapper_class=env_config["wrapper_class"], 

76 adv_wrapper_class=None, 

77 env_kwargs=env_config["env_kwargs"], 

78 seed=seed, 

79 ) 

80 

81 # Agent #################### 

82 

83 agent = config.make_agent( 

84 algo=config.agent_config["algo"], 

85 env=training_env, 

86 checkpoint=checkpoint, 

87 device=device, 

88 seed=seed, 

89 algo_kwargs=config.agent_config["algo_kwargs"], 

90 ) 

91 

92 agent.eval() 

93 

94 # RUN #################### 

95 

96 rewards, lengths = config.eval_config["evaluate_policy_fct"]( 

97 agent, 

98 eval_env, 

99 n_eval_episodes=n_eval_episodes, 

100 render=render, 

101 deterministic=config.eval_config["deterministic_eval"], 

102 return_episode_rewards=True, 

103 ) 

104 

105 # DISPLAY #################### 

106 

107 mean_lengths = np.mean(lengths) 

108 std_lengths = np.std(lengths) 

109 print() 

110 print("lengths : {}".format(lengths)) 

111 print("mean : {}".format(mean_lengths)) 

112 print("std : {}".format(std_lengths)) 

113 

114 mean_reward = np.mean(rewards) 

115 std_reward = np.std(rewards) 

116 print() 

117 print("rewards : {}".format(rewards)) 

118 print("mean : {}".format(mean_reward)) 

119 print("std : {}".format(std_reward)) 

120 

121 # SAVE #################### 

122 

123 if os.path.isfile(result_path): 

124 df = pd.read_csv(result_path, index_col=0) 

125 else: 

126 df = pd.DataFrame(columns=["mean reward", "std reward"]) 

127 df.loc[0] = {"mean reward": mean_reward, "std reward": std_reward} 

128 df = df.sort_index() 

129 df.to_csv(result_path)