Coverage for adaro_rl/pipelines/test.py: 91%

1import numpy as np

2import pandas as pd

3import os

6def test(

7 config,

8 output_dir="agent",

9 checkpoint=None,

10 render=False,

11 device="cpu",

12 seed=None,

13 n_eval_episodes=None,

14):

15 """

16 Evaluate a trained reinforcement learning agent on a test environment and log performance metrics.

18 This function loads a trained agent from the specified directory, creates both a training and an

19 evaluation environment, and then evaluates the agent's performance in terms of rewards and episode

20 lengths. The results (mean and standard deviation of rewards) are printed and saved to a CSV file

21 in the agent's results directory.

23 Parameters

24 ----------

25 config : object

26 Configuration object containing necessary settings.

27 output_dir : str, optional

28 Path to the directory where the agent files and evaluation results are stored.

29 Default is "agent".

30 checkpoint : str, optional

31 Path to the trained agent checkpoint to be loaded. Default is None.

32 render : bool, optional

33 If True, the environment will be rendered during evaluation.

34 Requires proper display configuration. Default is False.

35 device : str, optional

36 Computation device, e.g., "cpu" or "cuda:0". Default is "cpu".

37 seed : int, optional

38 Random seed for reproducibility. Default is 0.

40 Returns

41 -------

42 None

43 """

45 # SAVESPACE ####################

47 os.makedirs(output_dir, exist_ok=True)

48 result_path = os.path.join(output_dir, "result.csv")

50 if n_eval_episodes is None:

51 n_eval_episodes = config.n_eval_episodes

53 # ENV ####################

55 if render:

56 env_config = config.render_env_config

57 else:

58 env_config = config.eval_env_config

59 os.environ["SDL_VIDEODRIVER"] = "dummy"

61 training_env = config.make_env(

62 env_id=env_config["env_id"],

63 n_envs=env_config["n_envs"],

64 n_frame_stack=env_config["n_frame_stack"],

65 wrapper_class=env_config["wrapper_class"],

66 adv_wrapper_class=None,

67 env_kwargs=env_config["env_kwargs"],

68 seed=seed,

69 )

71 eval_env = config.make_env(

72 env_id=env_config["env_id"],

73 n_envs=env_config["n_envs"],

74 n_frame_stack=env_config["n_frame_stack"],

75 wrapper_class=env_config["wrapper_class"],

76 adv_wrapper_class=None,

77 env_kwargs=env_config["env_kwargs"],

78 seed=seed,

79 )

81 # Agent ####################

83 agent = config.make_agent(

84 algo=config.agent_config["algo"],

85 env=training_env,

86 checkpoint=checkpoint,

87 device=device,

88 seed=seed,

89 algo_kwargs=config.agent_config["algo_kwargs"],

90 )

92 agent.eval()

94 # RUN ####################

96 rewards, lengths = config.eval_config["evaluate_policy_fct"](

97 agent,

98 eval_env,

99 n_eval_episodes=n_eval_episodes,

100 render=render,

101 deterministic=config.eval_config["deterministic_eval"],

102 return_episode_rewards=True,

103 )

104

105 # DISPLAY ####################

106

107 mean_lengths = np.mean(lengths)

108 std_lengths = np.std(lengths)

109 print()

110 print("lengths : {}".format(lengths))

111 print("mean : {}".format(mean_lengths))

112 print("std : {}".format(std_lengths))

113

114 mean_reward = np.mean(rewards)

115 std_reward = np.std(rewards)

116 print()

117 print("rewards : {}".format(rewards))

118 print("mean : {}".format(mean_reward))

119 print("std : {}".format(std_reward))

120

121 # SAVE ####################

122

123 if os.path.isfile(result_path):

124 df = pd.read_csv(result_path, index_col=0)

125 else:

126 df = pd.DataFrame(columns=["mean reward", "std reward"])

127 df.loc[0] = {"mean reward": mean_reward, "std reward": std_reward}

128 df = df.sort_index()

129 df.to_csv(result_path)

Coverage for adaro_rl / pipelines / test.py: 91%

35 statements