Coverage for uqmodels / modelization / DL_estimator / neural_network_UQ.py: 77%

425 statements  

« prev     ^ index     » next       coverage.py v7.13.0, created at 2025-12-09 08:15 +0000

1import copy 

2import inspect 

3import os 

4import numpy as np 

5import tensorflow as tf 

6from sklearn.model_selection import KFold 

7 

8import uqmodels.modelization.DL_estimator.loss as uqloss 

9import uqmodels.processing as uqproc 

10from uqmodels.modelization.DL_estimator.utils import set_global_determinism 

11from uqmodels.modelization.UQEstimator import UQEstimator 

12from uqmodels.modelization.DL_estimator.data_generator import default_Generator 

13from uqmodels.utils import add_random_state, apply_mask, cut 

14 

15 

16def Identity_factory(X, y, **kwargs): 

17 return (X, y, None) 

18 

19 

20class NN_UQ(UQEstimator): 

21 "Neural Network UQ" 

22 

23 def __init__( 

24 self, 

25 model_initializer, 

26 model_parameters, 

27 factory_parameters=dict(), 

28 training_parameters=dict(), 

29 type_output=None, 

30 rescale=False, 

31 n_ech=5, 

32 train_ratio=0.9, 

33 var_min=0.000001, 

34 name="NN", 

35 random_state=None, 

36 ): 

37 self.model_initializer = model_initializer 

38 

39 if random_state is not None: 

40 params_list = list(inspect.signature(model_initializer).parameters) 

41 

42 if "seed" in params_list: 

43 model_parameters["seed"] = random_state 

44 

45 elif "random_state" in params_list: 

46 model_parameters["random_state"] = random_state 

47 

48 else: 

49 print( 

50 'Warning model_initializer have not "seed" or "random_state" parameters' 

51 ) 

52 

53 self.model_parameters = model_parameters 

54 self.factory_parameters = factory_parameters 

55 self.training_parameters = training_parameters 

56 self.type_output = type_output 

57 self.initialized = False 

58 self.history = [] 

59 self.n_ech = n_ech 

60 self.train_ratio = train_ratio 

61 

62 type_UQ = "var_A&E" 

63 super().__init__( 

64 name=name, 

65 type_UQ=type_UQ, 

66 rescale=rescale, 

67 var_min=var_min, 

68 random_state=random_state, 

69 ) 

70 if self.random_state is not None: 

71 model_parameters["random_state"] = random_state 

72 

73 if "generator" not in self.training_parameters.keys(): 

74 self.training_parameters["generator"] = False 

75 

76 if "test_batch_size" not in self.training_parameters.keys(): 

77 self.training_parameters["test_batch_size"] = 20000 

78 

79 # Additional deep ensemble parameter 

80 self.ddof = 1 

81 if "ddof" in model_parameters.keys(): 

82 self.ddof = model_parameters["ddof"] 

83 

84 if "train_ratio" in model_parameters.keys(): 

85 self.train_ratio = model_parameters["train_ratio"] 

86 

87 if "n_ech" in model_parameters.keys(): 

88 self.n_ech = model_parameters["n_ech"] 

89 

90 self.snapshot = False 

91 if "snapshot" in model_parameters.keys(): 

92 self.snapshot = model_parameters["snapshot"] 

93 

94 self.data_drop = 0 

95 if "data_drop" in model_parameters.keys(): 

96 self.data_drop = model_parameters["data_drop"] 

97 

98 if "k_fold" in model_parameters.keys(): 

99 self.k_fold = model_parameters["k_fold"] 

100 

101 def build_loss(self, loss, param_loss=None): 

102 """Build loss from str or loss and loss_parameters 

103 

104 Args: 

105 loss (_type_): _description_ 

106 param_loss (_type_, optional): _description_. Defaults to None. 

107 

108 Returns: 

109 _type_: _description_ 

110 """ 

111 if loss == "BNN": 

112 loss = uqloss.build_BNN_loss 

113 if param_loss is None: 

114 param_loss = {} 

115 elif loss == "EDL": 

116 loss = uqloss.build_EDL_loss 

117 if param_loss is None: 

118 param_loss = {} 

119 elif loss == "MSE": 

120 loss = uqloss.build_MSE_loss 

121 if param_loss is None: 

122 param_loss = {} 

123 else: 

124 pass 

125 

126 if param_loss is not None: 

127 if isinstance(param_loss, dict): 

128 loss = loss(**param_loss) 

129 

130 else: 

131 loss = loss(param_loss) 

132 else: 

133 loss = loss 

134 

135 return loss 

136 

137 def build_metrics(self, metrics): 

138 """Build list of metrics from str or metrics. 

139 

140 Args: 

141 metrics (_type_): _description_ 

142 """ 

143 list_metrics = [] 

144 for metric in metrics: 

145 if metric == "MSE": 

146 output_size = 1 

147 if self.type_output in ["MC_Dropout", "Deep_ensemble"]: 

148 output_size = 2 

149 elif self.type_output in ["EDL"]: 

150 output_size = 4 

151 metric = uqloss.build_MSE_loss(output_size, metric=True) 

152 elif metric == "BNN": 

153 metric = uqloss.build_BNN_loss( 

154 0.95, metric=True, type_output=self.type_output 

155 ) 

156 else: 

157 pass 

158 list_metrics.append(metric) 

159 return list_metrics 

160 

161 def _format(self, X, y, type_transform, mode_UQ=False): 

162 X, y = super()._format(X, y, type_transform=type_transform, mode_UQ=mode_UQ) 

163 return (X, y) 

164 

165 def factory(self, X, y, mask=None, cut_param=None, only_fit_scaler=False): 

166 if y is not None: 

167 self.y_shape = y.shape 

168 if cut_param is None: 

169 y = y 

170 

171 else: 

172 print("cuting_target") 

173 min_cut, max_cut = cut_param 

174 y = cut(y, min_cut, max_cut) 

175 

176 if self.rescale: 

177 X, y = self._format(X, y, type_transform="fit_transform") 

178 

179 if only_fit_scaler: 

180 return () 

181 

182 # X, y = [X], y 

183 

184 return (X, y, mask) 

185 

186 def save(self, path=None, name=None): 

187 if name is None: 

188 name = self.name 

189 

190 if self.type_output == "Deep_ensemble": 

191 for n, model in enumerate(self.model): 

192 cur_name = name + "_" + str(n) 

193 new_path = os.path.join(path, cur_name + '.weights.h5') 

194 os.makedirs(os.path.dirname(new_path), exist_ok=True) 

195 model.save_weights(new_path) 

196 else: 

197 new_path = os.path.join(path, name + '.weights.h5') 

198 os.makedirs(os.path.dirname(new_path), exist_ok=True) 

199 self.model.save_weights(new_path) 

200 

201 model_tmp = self.model 

202 self.model = True 

203 dict_parameters = self.__dict__ 

204 uqproc.write(path, [name + "_params"], dict_parameters) 

205 self.model = model_tmp 

206 

207 def load(self, path, name=None): 

208 # old_level_info = os.environ["TF_CPP_MIN_LOG_LEVEL"] 

209 # os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2" 

210 if name is None: 

211 name = self.name 

212 

213 dict_parameters = uqproc.read(path, [name + "_params"]) 

214 for attributes, values in dict_parameters.items(): 

215 self.__setattr__(attributes, values) 

216 

217 self.init_neural_network() 

218 if self.type_output == "Deep_ensemble": 

219 for n, model in enumerate(self.model): 

220 new_path = os.path.join(path, name + "_" + str(n) + '.weights.h5') 

221 model.load_weights(new_path) 

222 else: 

223 new_path = os.path.join(path, name + '.weights.h5') 

224 self.model.load_weights(new_path) 

225 # os.environ["TF_CPP_MIN_LOG_LEVEL"] = old_level_info 

226 

227 def compile(self, step=0, optimizer=None, loss=None, metrics=None, **kwarg): 

228 if optimizer is None: 

229 l_r = self.training_parameters["l_r"][step] 

230 optimizer = tf.keras.optimizers.Nadam(learning_rate=l_r) 

231 kwarg["optimizer"] = optimizer 

232 

233 if loss is None: 

234 loss_ = self.training_parameters["list_loss"][step] 

235 param_loss_current = self.training_parameters["param_loss"][step] 

236 loss = self.build_loss(loss_, param_loss_current) 

237 kwarg["loss"] = loss 

238 

239 if metrics is None: 

240 metrics = self.training_parameters["metrics"] 

241 kwarg["metrics"] = metrics 

242 self.model.compile(**kwarg) 

243 

244 def modify_dropout(self, dp): 

245 self.model.save_weights(self.name) 

246 self.model_parameters["dp"] = dp 

247 self.model = self.model_initializer(**self.model_parameters) 

248 self.initialized = True 

249 self.model.load_weights(self.name) 

250 

251 def reset(self): 

252 del self.model 

253 self.initialized = False 

254 

255 def init_neural_network(self): 

256 "apply model_initializer function with model_parameters and store in self.model" 

257 if self.random_state is not None: 

258 set_global_determinism(seed=self.random_state) 

259 

260 if self.type_output == "Deep_ensemble": 

261 self.model = [] 

262 for i in range(self.n_ech): 

263 self.model.append(self.model_initializer(**self.model_parameters)) 

264 else: 

265 self.model = self.model_initializer(**self.model_parameters) 

266 self.initialized = True 

267 

268 def fit( 

269 self, 

270 Inputs, 

271 Targets, 

272 train=None, 

273 test=None, 

274 training_parameters=None, 

275 verbose=None, 

276 **kwargs 

277 ): 

278 

279 print("start_fit") 

280 

281 if training_parameters is None: 

282 training_parameters = copy.deepcopy(self.training_parameters) 

283 if verbose is not None: 

284 training_parameters["verbose"] = verbose 

285 

286 if not (self.initialized) or not hasattr(self, "model"): 

287 self.init_neural_network() 

288 

289 if train is None: 

290 last_val = False 

291 if hasattr(self, "model_parameters") and ( 

292 "size_window" in self.model_parameters.keys() 

293 ): 

294 last_val = True 

295 

296 train, test = generate_train_test( 

297 len_=len(Targets), 

298 train_ratio=self.train_ratio, 

299 last_val=last_val, 

300 random_state=self.random_state, 

301 ) 

302 

303 if test is None: 

304 test = np.invert(train) 

305 

306 history = self.basic_fit( 

307 Inputs, Targets, train, test, **self.training_parameters 

308 ) 

309 

310 for i in history: 

311 self.history.append(history) 

312 

313 # Basic_predict function 

314 

315 def basic_fit( 

316 self, 

317 Inputs, 

318 Targets, 

319 train=None, 

320 test=None, 

321 epochs=[1000, 1000], 

322 b_s=[100, 20], 

323 l_r=[0.01, 0.005], 

324 sample_w=None, 

325 verbose=1, 

326 list_loss=["mse"], 

327 metrics=None, 

328 generator=None, 

329 steps_per_epoch=None, 

330 shuffle=True, 

331 callbacks="default", 

332 validation_freq=1, 

333 param_loss=None, 

334 test_batch_size=None, 

335 **kwargs 

336 ): 

337 

338 # Training function 

339 history = [] 

340 list_history = [] 

341 

342 if self.random_state is not None: 

343 set_global_determinism(seed=self.random_state) 

344 

345 if generator is None: 

346 generator = self.training_parameters["generator"] 

347 

348 if test_batch_size is None: 

349 test_batch_size = self.training_parameters["test_batch_size"] 

350 

351 if train is None: 

352 last_val = False 

353 if hasattr(self, "model_parameters") and ( 

354 "size_window" in self.model_parameters.keys() 

355 ): 

356 last_val = True 

357 

358 train, test = generate_train_test( 

359 len_=len(Targets), 

360 train_ratio=self.train_ratio, 

361 last_val=last_val, 

362 random_state=self.random_state, 

363 ) 

364 

365 if test is None: 

366 test = np.invert(train) 

367 

368 list_history = [] 

369 

370 if not (hasattr(self, "scaler")): 

371 _ = self.factory(Inputs, Targets, only_fit_scaler=True) 

372 

373 n_model = 1 

374 if self.type_output == "Deep_ensemble": 

375 n_model = self.n_ech 

376 list_sampletoremove = generate_K_fold_removing_index( 

377 n_model, 

378 k_fold=self.k_fold, 

379 train=train, 

380 data_drop=self.data_drop, 

381 random_state=self.random_state, 

382 ) 

383 

384 for n_model in range(n_model): 

385 train_ = np.copy(train) 

386 test_ = np.copy(test) 

387 

388 # Deep_ensemble : Submodel dataset differentiation if kfold activated 

389 if self.type_output == "Deep_ensemble": 

390 train_[list_sampletoremove[n_model]] = False 

391 test_[list_sampletoremove[n_model]] = True 

392 

393 for n, loss in enumerate(list_loss): 

394 for i, (batch_size, learning_rate) in enumerate(zip(b_s, l_r)): 

395 

396 loss_ = self.build_loss(loss, param_loss[n]) 

397 metrics = self.build_metrics(metrics) 

398 

399 if self.type_output == "Deep_ensemble": 

400 if (self.snapshot) & (n_model > 0): 

401 self.model[n_model] = tf.keras.clone_model(self.model[0]) 

402 

403 current_model = self.model[n_model] 

404 

405 else: 

406 current_model = self.model 

407 

408 current_model.compile( 

409 optimizer=tf.keras.optimizers.Nadam( 

410 learning_rate=learning_rate 

411 ), 

412 loss=loss_, 

413 metrics=metrics, 

414 ) 

415 

416 ( 

417 In_, 

418 Tar_, 

419 validation_data_, 

420 validation_steps, 

421 steps_per_epoch, 

422 batch_size 

423 ) = self.dataset_generator( 

424 Inputs=apply_mask(Inputs, train_), 

425 Targets=apply_mask(Targets, train_), 

426 validation_data=( 

427 apply_mask(Inputs, test_), 

428 apply_mask(Targets, test_), 

429 ), 

430 batch_size=batch_size, 

431 generator=generator, 

432 shuffle=shuffle, 

433 test_batch_size=test_batch_size, 

434 ) 

435 

436 if callbacks == "default": 

437 callbacks = uqloss.default_callbacks() 

438 

439 history = current_model.fit( 

440 x=In_, 

441 y=Tar_, 

442 validation_data=validation_data_, 

443 epochs=epochs[i], 

444 steps_per_epoch=steps_per_epoch, 

445 validation_steps=validation_steps, 

446 batch_size=batch_size, 

447 sample_weight=sample_w, 

448 shuffle=shuffle, 

449 callbacks=callbacks, 

450 validation_freq=validation_freq, 

451 verbose=verbose, 

452 ) 

453 

454 current_model.compile() 

455 

456 list_history.append(history) 

457 

458 return list_history 

459 

460 def dataset_generator( 

461 self, 

462 Inputs, 

463 Targets, 

464 validation_data=None, 

465 batch_size=32, 

466 shuffle=False, 

467 generator=True, 

468 test_batch_size=None, 

469 ): 

470 """Hold case with or without data generator 

471 

472 Args: 

473 Inputs (_type_): _description_ 

474 Targets (_type_): _description_ 

475 validation_data (_type_): _description_ 

476 batch (_type_): _description_ 

477 shuffle (_type_): _description_ 

478 generator (_type_): _description_ 

479 

480 Returns: 

481 _type_: _description_ 

482 """ 

483 if generator: 

484 In_ = self.Build_generator( 

485 Inputs, Targets, batch=batch_size, shuffle=shuffle, train=True 

486 ) 

487 Tar_ = None 

488 

489 if test_batch_size is None: 

490 test_batch_size = self.training_parameters["test_batch_size"] 

491 

492 validation_data_ = self.Build_generator( 

493 validation_data[0], 

494 validation_data[1], 

495 batch=test_batch_size, 

496 shuffle=False, 

497 train=True, 

498 ) 

499 

500 steps_per_epoch = In_.__len__() 

501 validation_steps = validation_data_.__len__() 

502 batch_size = None 

503 else: 

504 In_ = Inputs 

505 Tar_ = Targets 

506 validation_data_ = validation_data 

507 validation_steps = None 

508 steps_per_epoch = None 

509 return ( 

510 In_, 

511 Tar_, 

512 validation_data_, 

513 validation_steps, 

514 steps_per_epoch, 

515 batch_size, 

516 ) 

517 

518 def Build_generator(self, X, y, batch=32, shuffle=True, train=True): 

519 return default_Generator(X, y, metamodel=self, batch=batch, shuffle=shuffle, train=train) 

520 

521 def predict(self, X, type_output=None, generator=None, **kwargs): 

522 if type_output is None: 

523 type_output = self.type_output 

524 

525 pred, UQ = self.basic_predict( 

526 X, n_ech=self.n_ech, type_output=type_output, generator=generator, **kwargs 

527 ) 

528 

529 if self.rescale: 

530 _, pred = self._format(None, pred, type_transform="inverse_transform") 

531 _, UQ = self._format( 

532 None, UQ, type_transform="inverse_transform", mode_UQ=True 

533 ) 

534 

535 return (pred, UQ) 

536 

537 def basic_predict( 

538 self, 

539 Inputs, 

540 n_ech=6, 

541 type_output="MC_Dropout", 

542 generator=None, 

543 test_batch_size=None, 

544 **kwarg 

545 ): 

546 # Variational prediction + variance estimation for step T+1 et T+4(lag) 

547 if generator is None: 

548 generator = self.training_parameters["generator"] 

549 

550 if self.random_state is not None: 

551 set_global_determinism(seed=self.random_state) 

552 

553 if generator: 

554 if test_batch_size is None: 

555 test_batch_size = self.training_parameters["test_batch_size"] 

556 Inputs = self.Build_generator( 

557 Inputs, None, batch=test_batch_size, shuffle=False, train=False 

558 ) 

559 

560 if type_output in ["MC_Dropout", "MC_Dropout_no_PNN"]: 

561 pred, UQ = Drawn_based_prediction( 

562 Inputs, 

563 self.model, 

564 n_ech, 

565 ddof=self.ddof, 

566 generator=generator, 

567 type_output=type_output, 

568 ) 

569 

570 elif type_output == "Deep_ensemble": 

571 pred, UQ = Ensemble_based_prediction( 

572 Inputs, 

573 self.model, 

574 ddof=self.ddof, 

575 generator=generator, 

576 type_output=type_output, 

577 ) 

578 

579 elif type_output in ["EDL", "PNN", "None", None]: 

580 pred, UQ = Deterministic_prediction( 

581 Inputs, 

582 self.model, 

583 ddof=self.ddof, 

584 generator=generator, 

585 type_output=type_output, 

586 ) 

587 

588 else: 

589 raise Exception( 

590 "Unknown type_output : choose 'MC_Dropout' or 'Deep_esemble' or 'EDL' or 'Non' or None" 

591 ) 

592 

593 return (pred, UQ) 

594 

595 

596def Drawn_based_prediction( 

597 Inputs, model, n_ech, ddof, generator=False, type_output="MC_Dropout" 

598): 

599 """Prediction (mu,sigma) of Inputs using Drawn_based UQ-paragim (Ex : MC_dropout) 

600 

601 Args: 

602 model (tf.model): neural network 

603 n_ech (n_draw): number of dropout drawn 

604 Inputs (_type_): Inputs of model 

605 ddof (_type_): ddof 

606 generator (bool, optional): specify if Inputs is generator or not. Defaults to False. 

607 

608 Returns: 

609 _type_: _description_ 

610 """ 

611 

612 if generator: 

613 pred = [] 

614 var_a = [] 

615 var_e = [] 

616 for Inputs_gen, _ in Inputs: # by batch do n inf and aggreagate results 

617 output = [] 

618 for i in range(n_ech): 

619 output.append(model.predict(Inputs_gen)) 

620 

621 if type_output == "MC_Dropout_no_PNN": 

622 pred_ = np.array(output) 

623 var_a.append(0 * pred_.mean(axis=0)) 

624 

625 if type_output == "MC_Dropout": 

626 pred_, logvar = np.split(np.array(output), 2, -1) 

627 var_a.append(np.exp(logvar).mean(axis=0)) 

628 

629 pred.append(pred_.mean(axis=0)) 

630 var_e.append(pred_.var(axis=0)) 

631 

632 pred = np.concatenate(pred, axis=0) 

633 var_a = np.concatenate(var_a, axis=0) 

634 var_e = np.concatenate(var_e, axis=0) 

635 UQ = np.concatenate([var_a[None, :], var_e[None, :]], axis=0) 

636 else: 

637 output = [] 

638 for i in range(n_ech): 

639 output.append(model.predict(Inputs)) 

640 

641 if type_output == "MC_Dropout_no_PNN": 

642 pred_ = np.array(output) 

643 var_a = 0 * pred_.mean(axis=0) 

644 

645 if type_output == "MC_Dropout": 

646 pred_, logvar = np.split(np.array(output), 2, -1) 

647 var_a = np.exp(logvar).mean(axis=0) 

648 var_e = np.var(pred_, axis=0, ddof=ddof) 

649 pred = pred_.mean(axis=0) 

650 UQ = np.concatenate([var_a[None, :], var_e[None, :]], axis=0) 

651 return (pred, UQ) 

652 

653 

654def Deterministic_prediction(Inputs, model, ddof, generator=False, type_output=None): 

655 """Prediction (mu,sigma) of Inputs using Deterministic UQ-paragim (Ex : EDL) 

656 

657 Args: 

658 model (tf.model): neural network 

659 n_ech (n_draw): number of dropout drawn 

660 Inputs (_type_): Inputs of model 

661 ddof (_type_): ddof 

662 generator (bool, optional): specify if Inputs is generator or not. Defaults to False. 

663 type_output : type_output (EDL) 

664 

665 Returns: 

666 _type_: _description_ 

667 """ 

668 

669 if generator: 

670 output = [] 

671 for Inputs_gen, _ in Inputs: # by batch do n inf and aggreagate results 

672 output.append(model.predict(Inputs_gen)) 

673 output = np.concatenate(output, axis=0) 

674 

675 else: 

676 output = model.predict(Inputs) 

677 

678 if type_output == "EDL": 

679 gamma, vu, alpha, beta = np.split(output, 4, -1) 

680 alpha = alpha + 10e-6 

681 pred = gamma 

682 var_A = beta / (alpha - 1) 

683 # WARNING sqrt or not sqrt ? 

684 var_E = beta / (vu * (alpha - 1)) 

685 if (var_E == np.inf).sum() > 0: 

686 print("Warning inf values in var_E replace by s-min") 

687 if (var_A == np.inf).sum() > 0: 

688 print("Warning inf values in var_E replace by s-min") 

689 var_E[var_E == np.inf] = 0 

690 var_A[var_A == np.inf] = 0 

691 

692 elif type_output == "PNN": 

693 pred, logvar = np.split(output, 2, -1) 

694 var_A = np.exp(logvar) 

695 var_E = logvar * 0 

696 

697 else: 

698 pred = output 

699 var_A = 0 * pred 

700 var_E = 0 * pred 

701 

702 UQ = np.concatenate([var_E[None, :], var_A[None, :]], axis=0) 

703 return (pred, UQ) 

704 

705 

706def Ensemble_based_prediction(Inputs, models, ddof, generator=False, type_output=None): 

707 """Prediction (mu,sigma) of Inputs using Ensemble_based UQ-paradign 

708 

709 Args: 

710 model (tf.model): neural network 

711 n_ech (n_draw): number of dropout drawn 

712 Inputs (_type_): Inputs of model 

713 ddof (_type_): ddof 

714 generator (bool, optional): specify if Inputs is generator or not. Defaults to False. 

715 type_output : type_output (curently useless) 

716 

717 Returns: 

718 _type_: _description_ 

719 """ 

720 

721 if generator: 

722 pred = [] 

723 var_a = [] 

724 var_e = [] 

725 for Inputs_gen, _ in Inputs: # by batch do n inf and aggreagate results 

726 output = [] 

727 for submodel in models: 

728 output.append(submodel.predict(Inputs_gen)) 

729 

730 pred_, logvar = np.split(np.array(output), 2, -1) 

731 var_a.append(np.exp(logvar).mean(axis=0)) 

732 var_e.append(pred_.var(axis=0, ddof=ddof)) 

733 pred.append(pred_.mean(axis=0)) 

734 

735 pred = np.concatenate(pred, axis=0) 

736 var_a = np.concatenate(var_a, axis=0) 

737 var_e = np.concatenate(var_e, axis=0) 

738 else: 

739 output = [] 

740 for submodel in models: 

741 output.append(submodel.predict(Inputs)) 

742 pred, logvar = np.split(np.array(output), 2, -1) 

743 var_a = np.exp(logvar).mean(axis=0) 

744 var_e = np.var(pred, axis=0, ddof=ddof) 

745 pred = pred.mean(axis=0) 

746 UQ = np.concatenate([var_a[None, :], var_e[None, :]], axis=0) 

747 return (pred, UQ) 

748 

749 

750def get_training_parameters( 

751 epochs=[100, 100], 

752 b_s=[64, 32], 

753 l_r=[0.005, 0.001], 

754 list_loss=None, 

755 metrics=None, 

756 param_loss=None, 

757 type_output=None, 

758 generator=False, 

759 shuffle=True, 

760 verbose=1, 

761 sample_w=None, 

762 callbacks="default", 

763 **kwargs 

764): 

765 if list_loss is None: 

766 if type_output is None: 

767 list_loss = ["MSE"] 

768 metrics = ["MSE"] 

769 

770 if type_output == "MC_Dropout": 

771 list_loss = ["MSE", "BNN"] 

772 metrics = ["MSE", "BNN"] 

773 param_loss = [2, 0.9] 

774 

775 if type_output == "Deep_ensemble": 

776 list_loss = ["MSE", "BNN"] 

777 metrics = ["MSE", "BNN"] 

778 param_loss = [2, 0.9] 

779 

780 if type_output == "EDL": 

781 list_loss = ["MSE", "EDL", "EDL"] 

782 metrics = ["MSE", "BNN"] 

783 param_loss = [4, 1e-2, 10e-2] 

784 

785 dict_params = { 

786 "epochs": epochs, 

787 "b_s": b_s, 

788 "l_r": l_r, 

789 "sample_w": sample_w, 

790 "list_loss": list_loss, 

791 "metrics": metrics, 

792 "param_loss": param_loss, 

793 "generator": generator, 

794 "shuffle": shuffle, 

795 "verbose": verbose, 

796 "callbacks": callbacks, 

797 } 

798 

799 for key_arg in kwargs.keys(): 

800 dict_params[key_arg] = kwargs[key_arg] 

801 return dict_params 

802 

803 

804def get_params_dict( 

805 dim_in, 

806 dim_out=1, 

807 layers_size=[200, 150, 100], 

808 regularizer_=(0.0001, 0.0001), 

809 dp=None, 

810 name="MLP_UQ", 

811 type_output="MC_Dropout", 

812 **kwargs 

813): 

814 

815 dict_params = { 

816 "dim_in": dim_in, 

817 "dim_out": dim_out, 

818 "layers_size": layers_size, 

819 "regularizer_": regularizer_, 

820 "name": name, 

821 "n_ech": 5, 

822 "dp": dp, 

823 "type_output": type_output, 

824 "logvar_min": np.log(0.00005), 

825 } 

826 if type_output == "MC_Dropout": 

827 if dp is None: 

828 dict_params["dp"] = 0.15 

829 

830 if type_output == "Deep_ensemble": 

831 dict_params["n_ech"] = 5 

832 dict_params["k_fold"] = 8 

833 if dp is None: 

834 dict_params["dp"] = 0.02 

835 

836 if type_output == "EDL": 

837 if dp is None: 

838 dict_params["dp"] = 0.02 

839 

840 for key_arg in kwargs.keys(): 

841 dict_params[key_arg] = kwargs[key_arg] 

842 

843 return dict_params 

844 

845 

846def generate_K_fold_removing_index( 

847 n_model, k_fold, train, data_drop, random_state=None 

848): 

849 """Generate liste of idx to remove for k_fold deep ensemble procedure 

850 

851 Args: 

852 n_model (_type_): Number of models 

853 k_fold (_type_): Number of fold 

854 train (_type_): train_flag_idx 

855 data_drop (_type_): % of data drop 

856 random_state : handle experimental random using seed 

857 Returns: 

858 _type_: list_sampletoremove idx of sample to remove of train for each submodel 

859 """ 

860 list_sampletoremove = [] 

861 if k_fold is not None: 

862 if k_fold < n_model: 

863 print("Warning kfold lesser than model number") 

864 # Drop data using Kfold + random drop ratio to add variability to deep ensemble 

865 for n_fold, (keep, removed) in enumerate( 

866 KFold(k_fold, shuffle=True, random_state=random_state).split(train) 

867 ): 

868 if data_drop > 0: 

869 np.random.seed(add_random_state(random_state, n_fold)) 

870 sampletoremove = np.random.choice( 

871 keep, int(len(keep) * data_drop), replace=False 

872 ) 

873 sampletoremove = sorted(np.concatenate([removed, sampletoremove])) 

874 list_sampletoremove.append(sampletoremove) 

875 else: 

876 list_sampletoremove.append([]) 

877 else: 

878 list_sampletoremove = [[] for i in range(n_model)] 

879 if data_drop > 0: 

880 for n, i in enumerate(list_sampletoremove): 

881 np.random.seed(add_random_state(random_state, n)) 

882 sampletoremove = np.random.choice( 

883 np.arange(len(train)), 

884 int(len(train) * data_drop), 

885 replace=False, 

886 ) 

887 list_sampletoremove[n] = sampletoremove 

888 return list_sampletoremove 

889 

890 

891def generate_train_test(len_, train_ratio=0.92, last_val=True, random_state=None): 

892 if last_val: 

893 train = np.arange(len_) < train_ratio * len_ 

894 else: 

895 np.random.seed(random_state) 

896 train = np.random.rand(len_) < train_ratio 

897 test = np.invert(train) 

898 return (train, test)