Coverage for uqmodels / modelization / ML_estimator / baseline.py: 74%

165 statements  

« prev     ^ index     » next       coverage.py v7.13.0, created at 2025-12-09 08:15 +0000

1""" 

2Implementation of usual prediction wrappers. 

3""" 

4 

5from copy import deepcopy 

6from typing import Optional, TypeVar 

7 

8import numpy as np 

9from sklearn.ensemble import GradientBoostingRegressor, RandomForestRegressor 

10from sklearn.gaussian_process import GaussianProcessRegressor 

11from sklearn.gaussian_process.kernels import ( 

12 RBF, 

13 ConstantKernel, 

14 ExpSineSquared, 

15 RationalQuadratic, 

16 WhiteKernel, 

17) 

18 

19from uqmodels.modelization.UQEstimator import ( 

20 MeanVarUQEstimator, 

21 QuantileUQEstimator 

22) 

23from uqmodels.utils import add_random_state 

24 

25Array = TypeVar("Array") 

26UQmeasure = TypeVar("UQmeasure") 

27List_Estimators = TypeVar("List_Estimators") 

28Estimator = TypeVar("Estimator") 

29Kernel = TypeVar("Kernel") 

30Dict_params = TypeVar("Dict_params") 

31 

32 

33class GBRQ_UQEstimator(QuantileUQEstimator): 

34 """Uncertainty quantification approch based on Quantile Gradient Boosting : 

35 Instatiation OF QuantileUQEstimators using GBRQ scikilearn models. 

36 

37 Attributes/properties: 

38 name (str): Name of the UQ method for future reference. 

39 type_UQ (str): Method family among the main categories of UQUQEstimators. 

40 list_estimators (Estimator): List of quantile estimator. 

41 list_alpha : list of alpha confidence level for each quantile estimators. 

42 pretuned (bool): Whether to disable parameter model tuning. 

43 

44 Mean Methods: 

45 fit: Fit the list of quantile estimators. 

46 predict:(pred,UQ) Predict for each quantile estimators 

47 """ 

48 

49 def __init__( 

50 self, 

51 list_estimators: List_Estimators = None, 

52 list_alpha: list = [0.025, 0.5, 0.975], 

53 type_UQ="quantile", 

54 name: str = "GBRQ_UQEstimator", 

55 pretuned: bool = False, 

56 random_state: Optional[int] = None, 

57 ) -> None: 

58 """Initialise all attributes of the UQEstimatorGBRQ class. 

59 

60 Args: 

61 list_estimator: List of provided quatile estimators by default use GradientBoostingRegressor 

62 as default estimator. 

63 estimator_qmid: Medium quantile estimator, corresponding to 

64 the UQEstimator making the forecast. If None, use a 

65 GradientBoostingRegressor as default estimator. 

66 """ 

67 if list_estimators is None: 

68 list_estimators = [] 

69 for i, alpha in enumerate(list_alpha): 

70 list_estimators.append( 

71 GradientBoostingRegressor( 

72 random_state=add_random_state(random_state, i), 

73 loss="quantile", 

74 alpha=alpha, 

75 ) 

76 ) 

77 

78 super().__init__( 

79 list_estimators, 

80 list_alpha, 

81 type_UQ=type_UQ, 

82 name=name, 

83 random_state=random_state, 

84 ) 

85 self.pretunned = pretuned 

86 

87 def _format( 

88 self, 

89 X: np.array, 

90 y: np.array, 

91 type_transform: str, 

92 mode_UQ: bool = False, 

93 skip_format=False, 

94 ): 

95 X, y = super()._format( 

96 X=X, 

97 y=y, 

98 type_transform=type_transform, 

99 mode_UQ=mode_UQ, 

100 skip_format=skip_format, 

101 ) 

102 return (X, y) 

103 

104 def fit(self, X: Array, y: Array, skip_format=False, **kwargs) -> None: 

105 """Fit GBRQ_UQEstimator list using QuantileUQEstimator fit methods. 

106 Args: 

107 X: Features 

108 y: Target values 

109 """ 

110 super().fit(X, y, skip_format=skip_format) 

111 

112 def predict(self, X: Array, skip_format=False, **kwargs): 

113 """Perform the quantile estimation using QuantileUQEstimator predict methods. 

114 

115 Args: 

116 X: Features 

117 

118 Return: 

119 pred: Median prediction or None if Median quantile estimators is not in list_estimators 

120 UQ : List of quantiles estimatiors 

121 """ 

122 pred, UQ = super().predict(X, skip_format=skip_format) 

123 return (pred, UQ) 

124 

125 def _tuning( 

126 self, 

127 X: Array, 

128 y: Array, 

129 n_esti: int = 100, 

130 folds: int = 4, 

131 params: Dict_params = None, 

132 **kwarg, 

133 ) -> None: 

134 """Perform a random search tuning using a parameter grid with QuantileUQEstimator _tuning methode""" 

135 if not (self.pretunned): 

136 super()._tuning(X, y, n_esti, folds, params) 

137 

138 

139class GPR_UQEstimator(MeanVarUQEstimator): 

140 """Uncertainty quantification approch based on Gaussian Process Regressor. 

141 Instatiation OF MeanVarUQEstimator using GP scikilearn model. 

142 Warning GP has UQ limitation : see "The pitfalls of using Gaussian Process Regression for normative modeling" 

143 """ 

144 

145 def __init__( 

146 self, 

147 name: str = "Gaussian_Process_UQ", 

148 kernel: Kernel = None, 

149 gp_alpha: float = 0.000001, 

150 drop_ratio: float = 0.0, 

151 rescale: bool = False, 

152 random_state: Optional[int] = None, 

153 ) -> None: 

154 self.gp_alpha = gp_alpha 

155 self.kernel = kernel 

156 self.drop_ratio = drop_ratio 

157 

158 if kernel is None: 

159 self.kernel = ( 

160 ConstantKernel() * RationalQuadratic() 

161 + RBF() * ExpSineSquared() 

162 + RBF() * WhiteKernel(0.0001) 

163 + WhiteKernel(0.001) 

164 ) 

165 estimator = GaussianProcessRegressor( 

166 self.kernel, 

167 alpha=self.gp_alpha, 

168 n_restarts_optimizer=4, 

169 random_state=random_state, 

170 ) 

171 super().__init__( 

172 estimator=estimator, 

173 estimator_var=None, 

174 type_UQ="var", 

175 name=name, 

176 rescale=rescale, 

177 random_state=random_state, 

178 ) 

179 

180 def _format( 

181 self, 

182 X: np.array, 

183 y: np.array, 

184 type_transform: str, 

185 mode_UQ: bool = False, 

186 skip_format=False, 

187 ): 

188 X, y = super()._format( 

189 X=X, 

190 y=y, 

191 type_transform=type_transform, 

192 mode_UQ=mode_UQ, 

193 skip_format=skip_format, 

194 ) 

195 return (X, y) 

196 

197 def fit(self, X, y, skip_format=False, **kwargs): 

198 """Fit procedure of Gaussian process 

199 

200 Args: 

201 X: Training vectors. 

202 y: Target values. 

203 """ 

204 X, y = self._format(X, y, "fit_transform", skip_format=skip_format) 

205 

206 # GP model make native variance estimation 

207 if self.drop_ratio == 0: 

208 self.estimator.fit(X, y) 

209 

210 else: 

211 mask = np.random.rand(len(y)) > self.drop_ratio 

212 X_mask, y_mask = X[mask], y[mask] 

213 self.estimator.fit(X_mask, y_mask) 

214 

215 def predict(self, X: Array, skip_format=False, **kwargs): 

216 """Perform the prediction task of the forecasting and 

217 uncertainty models on X. 

218 

219 Args: 

220 X: Samples on which to perform the prediction. 

221 

222 Return: 

223 A tuple containing the forecast, the predicted lower and 

224 upper quantiles and var. 

225 """ 

226 

227 X, _ = self._format(X, None, "transform", skip_format) 

228 

229 pred, std = self.estimator.predict(X, return_std=True) 

230 UQ = np.power(std, 2) 

231 

232 _, pred = self._format(None, pred, "inverse_transform", mode_UQ=True) 

233 _, UQ = self._format(None, UQ, "inverse_transform", mode_UQ=True) 

234 return (pred, UQ) 

235 

236 def _tuning(self, X: Array, y: Array, **kwarg) -> None: 

237 """Perform random search tuning using a given grid parameter""" 

238 print("No tunning procedure") 

239 

240 

241class REGML_UQEstimator(MeanVarUQEstimator): 

242 """Uncertainty quantification approch based on ML regression of bias and variance 

243 Instanciation of specific Pred-Biais-Var Regression scheme for uncertainty quantification 

244 """ 

245 

246 def __init__( 

247 self, 

248 estimator=None, 

249 estimator_var=None, 

250 pretuned: bool = False, 

251 type_UQ: str = "var", 

252 use_biais: bool = True, 

253 name: str = "REGML_UQEstimator", 

254 var_min: float = 0.000001, 

255 rescale: bool = False, 

256 random_state: Optional[int] = None, 

257 ) -> None: 

258 

259 if estimator is None: 

260 estimator = RandomForestRegressor(random_state=random_state) 

261 

262 if estimator_var is None: 

263 estimator_var = deepcopy(estimator) 

264 

265 name = name + "_" + type_UQ 

266 type_UQ = type_UQ 

267 

268 super().__init__( 

269 estimator=estimator, 

270 estimator_var=estimator_var, 

271 name=name, 

272 type_UQ=type_UQ, 

273 rescale=rescale, 

274 var_min=var_min, 

275 random_state=random_state, 

276 ) 

277 

278 self.pretuned = pretuned 

279 self.use_biais = use_biais 

280 

281 if self.use_biais: 

282 self.estimator_bias = deepcopy(estimator) 

283 

284 self.std_norm = 1 

285 if self.type_UQ in ["var", "res_var"]: 

286 self.estimator_var = estimator_var 

287 

288 elif self.type_UQ in ["2var", "res_2var"]: 

289 self.estimator_var_bot = estimator_var 

290 self.estimator_var_top = deepcopy(estimator_var) 

291 

292 def _format( 

293 self, X, y, type_transform=False, mode_UQ=False, skip_format=False, **kwargs 

294 ): 

295 X, y = super()._format( 

296 X=X, 

297 y=y, 

298 type_transform=type_transform, 

299 mode_UQ=mode_UQ, 

300 skip_format=skip_format, 

301 ) 

302 return X, y 

303 

304 def get_params(self, **kwargs) -> Dict_params: 

305 dict_params = super().get_params() 

306 return dict_params 

307 

308 def fit(self, X: Array, y: Array, skip_format=False, **kwargs) -> None: 

309 """Train y_lowerh forecasting and UQ models on (X,Y).""" 

310 

311 X, y = self._format(X, y, "fit_transform", skip_format=skip_format) 

312 

313 # Train forecaster models and compute residuals 

314 self.estimator.fit(X, y) 

315 pred = self.estimator.predict(X) 

316 residual = np.squeeze(y) - np.squeeze(pred) 

317 residual = residual.reshape(y.shape) 

318 

319 if self.use_biais: 

320 # Train bias model and reduce bias to residuals 

321 self.estimator_bias.fit(X, residual) 

322 bias = self.estimator_bias.predict(X) 

323 residual = np.squeeze(residual) - np.squeeze(bias) 

324 residual = residual.reshape(y.shape) 

325 

326 # Normalisation for variance learning 

327 self.std_norm = 1 / np.abs(residual).mean() 

328 

329 else: 

330 # Normalisation for variance learning 

331 self.std_norm = 1 / np.abs(residual).mean() 

332 

333 # Train variance estimator of residuals 

334 if self.type_UQ in ["var", "res_var"]: 

335 residual = np.power(residual * self.std_norm, 2) 

336 self.estimator_var.fit(X, residual) 

337 

338 # Train variance estimators of positive and negative residuals. 

339 elif self.type_UQ in ["2var", "res_2var"]: 

340 mask_res_bot = np.squeeze((residual) <= 0) 

341 mask_res_top = np.squeeze((residual) >= 0) 

342 if len(mask_res_bot.shape) == 1: 

343 residual = np.power(residual * self.std_norm, 2) 

344 self.estimator_var_bot.fit(X[mask_res_bot], residual[mask_res_bot]) 

345 self.estimator_var_top.fit(X[mask_res_top], residual[mask_res_top]) 

346 else: 

347 residual = np.power(residual * self.std_norm, 2) 

348 self.estimator_var_bot.fit(X, residual * mask_res_bot) 

349 self.estimator_var_top.fit(X, residual * mask_res_top) 

350 else: 

351 print("type_UQ", self.type_UQ, "not covered") 

352 

353 def predict(self, X: Array, skip_format=False, **kwargs): 

354 """Perform the prediction task of the forecasting and UQ models 

355 on features (X). 

356 

357 Args: 

358 X: Samples on which to perform the prediction. 

359 

360 Return: 

361 pred, UQ : Prediction and UQmeasure 

362 """ 

363 

364 X, _ = self._format(X, None, "transform", skip_format=skip_format) 

365 

366 # predict Forecast and bias 

367 pred = self.estimator.predict(X) 

368 

369 if self.use_biais: 

370 bias = self.estimator_bias.predict(X) 

371 else: 

372 bias = 0 

373 

374 # Predict std of residuals 

375 var_min = self.var_min 

376 if self.type_UQ in ["var", "res_var"]: 

377 var = self.estimator_var.predict(X) 

378 var[var < var_min] = var_min 

379 UQ = np.sqrt(var) / np.power(self.std_norm, 2) 

380 

381 # Predict std of positive and negative residuals 

382 elif self.type_UQ in ["2var", "res_2var"]: 

383 var_bot = self.estimator_var_bot.predict(X) 

384 var_bot[var_bot < var_min] = var_min 

385 var_bot = var_bot / np.power(self.std_norm, 2) 

386 

387 var_top = self.estimator_var_top.predict(X) 

388 var_top[var_top < var_min] = var_min 

389 var_top = var_top / np.power(self.std_norm, 2) 

390 UQ = np.concatenate( 

391 [np.expand_dims(i, 0) for i in [var_bot, var_top]], axis=0 

392 ) 

393 

394 _, pred = self._format(None, pred + bias, "inverse_transform") 

395 _, UQ = self._format(None, UQ, "inverse_transform", mode_UQ=True) 

396 return (pred, UQ) 

397 

398 def _tuning( 

399 self, 

400 X: Array, 

401 y: Array, 

402 n_esti: int = 100, 

403 folds: int = 4, 

404 params: Dict_params = None, 

405 **kwarg, 

406 ) -> None: 

407 """Perform random search tuning using a given grid parameter.""" 

408 score = "neg_mean_squared_error" 

409 

410 X, y = self._format(X, y, "fit_transform") 

411 

412 # IF there is no parameter grid : skip tuning step. 

413 if params is not None: 

414 # IF forecast model is tunned, skip it's tuning step. 

415 if not (self.pretuned): 

416 print(X.shape, y.shape) 

417 self.estimator = super()._tuning( 

418 self.estimator, X, y, n_esti, folds, score, params 

419 ) 

420 

421 self.estimator.fit(X, y) 

422 pred = self.estimator.predict(X) 

423 

424 # Build residuals and tune bias model by random search on given greed parameters. 

425 residual = np.squeeze(y) - np.squeeze(pred) 

426 residual = residual.reshape(y.shape) 

427 print(X.shape, residual.shape) 

428 if self.use_biais: 

429 self.estimator_bias = super()._tuning( 

430 self.estimator_bias, 

431 X, 

432 residual, 

433 int(n_esti / 2), 

434 folds, 

435 score, 

436 params, 

437 ) 

438 self.estimator_bias.fit(X, residual) 

439 bias = self.estimator_bias.predict(X) 

440 

441 # Correct residuals and tune variance model by random search on given greed parameters. 

442 residual = np.squeeze(residual) - np.squeeze(bias) 

443 residual = residual.reshape(y.shape) 

444 

445 if self.type_UQ == "var": 

446 # Gaussian hypothesis : 1 variance model. 

447 residual = np.power(residual / residual.std(), 2) 

448 self.estimator_var = super()._tuning( 

449 self.estimator_var, 

450 X, 

451 residual, 

452 int(n_esti / 2), 

453 folds, 

454 score, 

455 params, 

456 ) 

457 

458 self.estimator_var.fit(X, residual) 

459 

460 elif self.type_UQ == "2var": 

461 # 2 Gaussian hypothesis : 

462 # 2 variance models for positive (top) and negative (bot) residuals STD. 

463 flag_res_top = np.squeeze((residual) >= 0) 

464 flag_res_bot = np.squeeze((residual) <= 0) 

465 residual = np.power(residual / residual.std(), 2) 

466 self.estimator_var_bot = super()._tuning( 

467 self.estimator_var_bot, 

468 X[flag_res_bot], 

469 residual[flag_res_bot], 

470 int(n_esti / 2), 

471 folds, 

472 score, 

473 params, 

474 ) 

475 

476 self.estimator_var_bot.fit(X[flag_res_bot], residual[flag_res_bot]) 

477 

478 self.estimator_var_top = super()._tuning( 

479 self.estimator_var_top, 

480 X[flag_res_top], 

481 residual[flag_res_top], 

482 int(n_esti / 2), 

483 folds, 

484 score, 

485 params, 

486 ) 

487 

488 self.estimator_var_top.fit(X[flag_res_top], residual[flag_res_top])