Coverage for uqmodels/evaluation/base_metrics.py: 25%

60 statements  

« prev     ^ index     » next       coverage.py v7.10.6, created at 2025-09-05 14:29 +0000

1""" 

2Metrics module for UQ method evaluation. 

3""" 

4 

5import numpy as np 

6from sklearn.metrics import mean_absolute_error, mean_squared_error 

7 

8# @TODO Add meta class for automatic metric evaluation on the benchmark 

9 

10# Base intermediare_metrics 

11 

12 

13def mae(y_true, y_pred): 

14 return mean_absolute_error(y_pred, y_true) 

15 

16 

17def rmse(y_true, y_pred): 

18 return np.sqrt(mean_squared_error(y_pred, y_true)) 

19 

20 

21def q_loss(y, pred, per): 

22 x = y - pred 

23 return ((per - 1.0) * x * (x < 0) + per * x * (x >= 0)).mean() 

24 

25 

26def quantile_loss(y, y_pred_lower, y_pred_upper, alpha): 

27 q_loss_bot = q_loss(y, y_pred_lower, (1 - alpha) / 2) 

28 q_loss_top = q_loss(y, y_pred_upper, 1 - ((1 - alpha) / 2)) 

29 return q_loss_bot + q_loss_top 

30 

31 

32def NLL_loss(y, pred, sigma): 

33 -np.log(sigma) - 0.5 * np.log(2 * np.pi) - (y - pred) ** 2 / (2 * (sigma**2)) 

34 

35 

36def perf_pred(y_pred, y): 

37 return mean_absolute_error(y_pred, y), rmse(y_pred, y) 

38 

39 

40def average_coverage(y_true, y_pred_lower, y_pred_upper): 

41 return ((y_true >= y_pred_lower) & (y_true <= y_pred_upper)).mean() 

42 

43 

44def ace(y_true, y_pred_lower, y_pred_upper, alpha): 

45 cov = average_coverage(y_true, y_pred_lower, y_pred_upper) 

46 return cov - (1 - alpha) 

47 

48 

49def sharpness(y_pred_lower, y_pred_upper): 

50 return (np.abs(y_pred_upper - y_pred_lower)).mean() 

51 

52 

53def sharpness2(y_pred_lower, y_pred_upper): 

54 return np.sqrt(np.power(y_pred_upper - y_pred_lower, 2)).mean() 

55 

56 

57def interval_score(y_true, y_pred_lower, y_pred_upper, alpha): 

58 return ( 

59 -2 * alpha * (y_pred_upper - y_pred_lower) 

60 - 4 * (y_pred_lower - y_true) * (y_true < y_pred_lower) 

61 - 4 * (y_true - y_pred_upper) * (y_pred_upper < y_true) 

62 ).mean() 

63 

64 

65def print_real_metrics_meta(res): 

66 res_mean = np.array(res).mean(axis=0) 

67 res_std = np.array(res).std(axis=0) 

68 if res_mean.shape[0] == 3: 

69 name_list = ["TRAIN", "CAL", "TEST"] 

70 else: 

71 name_list = ["TRAIN", "TEST"] 

72 for n, flag in enumerate(name_list): 

73 str_1 = "pred_mae = {mae:.3f}±{std_mae:.3f}, pred_mse = {mse:.3f}±{std_mse:.3f}" 

74 print( 

75 name_list[n], 

76 " ", 

77 str_1.format( 

78 mae=res_mean[n][0], 

79 mse=res_mean[n][1], 

80 std_mae=res_std[n][0], 

81 std_mse=res_std[n][1], 

82 ), 

83 ) 

84 str_2 = "Diff_cov= {dif_ace:.1f}%±{std_dif_ace:.1f}, Q_loss = {qloss:.3f}±{std_qloss:.3f}" 

85 print( 

86 str_2.format( 

87 dif_ace=res_mean[n][2], 

88 std_dif_ace=res_std[n][2], 

89 std_qloss=res_std[n][5], 

90 qloss=res_mean[n][5], 

91 ) 

92 ) 

93 str_3 = "Sharpness = {sharp:.3f}±{std_sharp:.3f}, Sharpness² = {sharp2:.3f}±{std_sharp:.3f}" 

94 print( 

95 str_3.format( 

96 sharp=res_mean[n][3], 

97 std_sharp=res_std[n][3], 

98 sharp2=res_mean[n][4], 

99 std_sharp2=res_std[n][3], 

100 ) 

101 ) 

102 return (np.round(res_mean, 3), np.round(res_std, 3)) 

103 

104 

105def real_metrics( 

106 y, pred_, bot, top, train, test, alpha=0.90, train_fit=None, verbose=0 

107): 

108 res = [] 

109 

110 flag_list = [train, test] 

111 if not isinstance(train_fit, type(None)): 

112 if train_fit.sum() != train.sum(): 

113 train_cal = np.copy(train) 

114 train_cal[train_fit] = False 

115 flag_list = [train_fit, train_cal, test] 

116 

117 for n, flag in enumerate(flag_list): 

118 a = [-1, -1] 

119 if pred_ is not None: 

120 a = perf_pred(pred_[flag], y[flag]) 

121 b = 100 * np.abs(ace(y[flag], bot[flag], top[flag], alpha)) 

122 c = sharpness(bot[flag], top[flag]) 

123 d = sharpness2(bot[flag], top[flag]) 

124 e = quantile_loss(y[flag], bot[flag], top[flag], alpha) 

125 res.append(list((a[0], a[1], b, c, d, e))) 

126 return res