Coverage for uqmodels/evaluation/base_metrics.py: 25%
60 statements
« prev ^ index » next coverage.py v7.10.6, created at 2025-09-05 14:29 +0000
« prev ^ index » next coverage.py v7.10.6, created at 2025-09-05 14:29 +0000
1"""
2Metrics module for UQ method evaluation.
3"""
5import numpy as np
6from sklearn.metrics import mean_absolute_error, mean_squared_error
8# @TODO Add meta class for automatic metric evaluation on the benchmark
10# Base intermediare_metrics
13def mae(y_true, y_pred):
14 return mean_absolute_error(y_pred, y_true)
17def rmse(y_true, y_pred):
18 return np.sqrt(mean_squared_error(y_pred, y_true))
21def q_loss(y, pred, per):
22 x = y - pred
23 return ((per - 1.0) * x * (x < 0) + per * x * (x >= 0)).mean()
26def quantile_loss(y, y_pred_lower, y_pred_upper, alpha):
27 q_loss_bot = q_loss(y, y_pred_lower, (1 - alpha) / 2)
28 q_loss_top = q_loss(y, y_pred_upper, 1 - ((1 - alpha) / 2))
29 return q_loss_bot + q_loss_top
32def NLL_loss(y, pred, sigma):
33 -np.log(sigma) - 0.5 * np.log(2 * np.pi) - (y - pred) ** 2 / (2 * (sigma**2))
36def perf_pred(y_pred, y):
37 return mean_absolute_error(y_pred, y), rmse(y_pred, y)
40def average_coverage(y_true, y_pred_lower, y_pred_upper):
41 return ((y_true >= y_pred_lower) & (y_true <= y_pred_upper)).mean()
44def ace(y_true, y_pred_lower, y_pred_upper, alpha):
45 cov = average_coverage(y_true, y_pred_lower, y_pred_upper)
46 return cov - (1 - alpha)
49def sharpness(y_pred_lower, y_pred_upper):
50 return (np.abs(y_pred_upper - y_pred_lower)).mean()
53def sharpness2(y_pred_lower, y_pred_upper):
54 return np.sqrt(np.power(y_pred_upper - y_pred_lower, 2)).mean()
57def interval_score(y_true, y_pred_lower, y_pred_upper, alpha):
58 return (
59 -2 * alpha * (y_pred_upper - y_pred_lower)
60 - 4 * (y_pred_lower - y_true) * (y_true < y_pred_lower)
61 - 4 * (y_true - y_pred_upper) * (y_pred_upper < y_true)
62 ).mean()
65def print_real_metrics_meta(res):
66 res_mean = np.array(res).mean(axis=0)
67 res_std = np.array(res).std(axis=0)
68 if res_mean.shape[0] == 3:
69 name_list = ["TRAIN", "CAL", "TEST"]
70 else:
71 name_list = ["TRAIN", "TEST"]
72 for n, flag in enumerate(name_list):
73 str_1 = "pred_mae = {mae:.3f}±{std_mae:.3f}, pred_mse = {mse:.3f}±{std_mse:.3f}"
74 print(
75 name_list[n],
76 " ",
77 str_1.format(
78 mae=res_mean[n][0],
79 mse=res_mean[n][1],
80 std_mae=res_std[n][0],
81 std_mse=res_std[n][1],
82 ),
83 )
84 str_2 = "Diff_cov= {dif_ace:.1f}%±{std_dif_ace:.1f}, Q_loss = {qloss:.3f}±{std_qloss:.3f}"
85 print(
86 str_2.format(
87 dif_ace=res_mean[n][2],
88 std_dif_ace=res_std[n][2],
89 std_qloss=res_std[n][5],
90 qloss=res_mean[n][5],
91 )
92 )
93 str_3 = "Sharpness = {sharp:.3f}±{std_sharp:.3f}, Sharpness² = {sharp2:.3f}±{std_sharp:.3f}"
94 print(
95 str_3.format(
96 sharp=res_mean[n][3],
97 std_sharp=res_std[n][3],
98 sharp2=res_mean[n][4],
99 std_sharp2=res_std[n][3],
100 )
101 )
102 return (np.round(res_mean, 3), np.round(res_std, 3))
105def real_metrics(
106 y, pred_, bot, top, train, test, alpha=0.90, train_fit=None, verbose=0
107):
108 res = []
110 flag_list = [train, test]
111 if not isinstance(train_fit, type(None)):
112 if train_fit.sum() != train.sum():
113 train_cal = np.copy(train)
114 train_cal[train_fit] = False
115 flag_list = [train_fit, train_cal, test]
117 for n, flag in enumerate(flag_list):
118 a = [-1, -1]
119 if pred_ is not None:
120 a = perf_pred(pred_[flag], y[flag])
121 b = 100 * np.abs(ace(y[flag], bot[flag], top[flag], alpha))
122 c = sharpness(bot[flag], top[flag])
123 d = sharpness2(bot[flag], top[flag])
124 e = quantile_loss(y[flag], bot[flag], top[flag], alpha)
125 res.append(list((a[0], a[1], b, c, d, e)))
126 return res