Coverage for uqmodels/modelization/DL_estimator/neural_network_UQ.py: 72%

467 statements  

« prev     ^ index     » next       coverage.py v7.10.6, created at 2025-09-05 14:29 +0000

1import copy 

2import inspect 

3import os 

4import random 

5 

6import numpy as np 

7import tensorflow as tf 

8from sklearn.model_selection import KFold 

9 

10import uqmodels.modelization.DL_estimator.loss as uqloss 

11import uqmodels.processing as uqproc 

12from uqmodels.modelization.DL_estimator.metalayers import mlp 

13from uqmodels.modelization.DL_estimator.utils import set_global_determinism 

14from uqmodels.modelization.UQEstimator import UQEstimator, get_UQEstimator_parameters 

15from uqmodels.utils import add_random_state, apply_mask, cut, generate_random_state 

16 

17 

18def Identity_factory(X, y, **kwargs): 

19 return (X, y, None) 

20 

21 

22class NN_UQ(UQEstimator): 

23 "Neural Network UQ" 

24 

25 def __init__( 

26 self, 

27 model_initializer, 

28 model_parameters, 

29 factory_parameters=dict(), 

30 training_parameters=dict(), 

31 type_output=None, 

32 rescale=False, 

33 n_ech=5, 

34 train_ratio=0.9, 

35 var_min=0.000001, 

36 name="NN", 

37 random_state=None, 

38 ): 

39 self.model_initializer = model_initializer 

40 

41 if random_state is not None: 

42 params_list = list(inspect.signature(model_initializer).parameters) 

43 

44 if "seed" in params_list: 

45 model_parameters["seed"] = random_state 

46 

47 elif "random_state" in params_list: 

48 model_parameters["random_state"] = random_state 

49 

50 else: 

51 print( 

52 'Warning model_initializer have not "seed" or "random_state" parameters' 

53 ) 

54 

55 self.model_parameters = model_parameters 

56 self.factory_parameters = factory_parameters 

57 self.training_parameters = training_parameters 

58 self.type_output = type_output 

59 self.initialized = False 

60 self.history = [] 

61 self.n_ech = n_ech 

62 self.train_ratio = train_ratio 

63 

64 type_UQ = "var_A&E" 

65 super().__init__( 

66 name=name, 

67 type_UQ=type_UQ, 

68 rescale=rescale, 

69 var_min=var_min, 

70 random_state=random_state, 

71 ) 

72 if self.random_state is not None: 

73 model_parameters["random_state"] = random_state 

74 

75 if "generator" not in self.training_parameters.keys(): 

76 self.training_parameters["generator"] = False 

77 

78 if "test_batch_size" not in self.training_parameters.keys(): 

79 self.training_parameters["test_batch_size"] = 20000 

80 

81 # Additional deep ensemble parameter 

82 self.ddof = 1 

83 if "ddof" in model_parameters.keys(): 

84 self.ddof = model_parameters["ddof"] 

85 

86 if "train_ratio" in model_parameters.keys(): 

87 self.train_ratio = model_parameters["train_ratio"] 

88 

89 if "n_ech" in model_parameters.keys(): 

90 self.n_ech = model_parameters["n_ech"] 

91 

92 self.snapshot = False 

93 if "snapshot" in model_parameters.keys(): 

94 self.snapshot = model_parameters["snapshot"] 

95 

96 self.data_drop = 0 

97 if "data_drop" in model_parameters.keys(): 

98 self.data_drop = model_parameters["data_drop"] 

99 

100 if "k_fold" in model_parameters.keys(): 

101 self.k_fold = model_parameters["k_fold"] 

102 

103 def build_loss(self, loss, param_loss=None): 

104 """Build loss from str or loss and loss_parameters 

105 

106 Args: 

107 loss (_type_): _description_ 

108 param_loss (_type_, optional): _description_. Defaults to None. 

109 

110 Returns: 

111 _type_: _description_ 

112 """ 

113 if loss == "BNN": 

114 loss = uqloss.build_BNN_loss 

115 if param_loss is None: 

116 param_loss = {} 

117 elif loss == "EDL": 

118 loss = uqloss.build_EDL_loss 

119 if param_loss is None: 

120 param_loss = {} 

121 elif loss == "MSE": 

122 loss = uqloss.build_MSE_loss 

123 if param_loss is None: 

124 param_loss = {} 

125 else: 

126 pass 

127 

128 if param_loss is not None: 

129 if isinstance(param_loss, dict): 

130 loss = loss(**param_loss) 

131 

132 else: 

133 loss = loss(param_loss) 

134 else: 

135 loss = loss 

136 

137 return loss 

138 

139 def build_metrics(self, metrics): 

140 """Build list of metrics from str or metrics. 

141 

142 Args: 

143 metrics (_type_): _description_ 

144 """ 

145 list_metrics = [] 

146 for metric in metrics: 

147 if metric == "MSE": 

148 output_size = 1 

149 if self.type_output in ["MC_Dropout", "Deep_ensemble"]: 

150 output_size = 2 

151 elif self.type_output in ["EDL"]: 

152 output_size = 4 

153 metric = uqloss.build_MSE_loss(output_size, metric=True) 

154 elif metric == "BNN": 

155 metric = uqloss.build_BNN_loss( 

156 0.95, metric=True, type_output=self.type_output 

157 ) 

158 else: 

159 pass 

160 list_metrics.append(metric) 

161 return list_metrics 

162 

163 def _format(self, X, y, type_transform, mode_UQ=False): 

164 X, y = super()._format(X, y, type_transform=type_transform, mode_UQ=mode_UQ) 

165 return (X, y) 

166 

167 def factory(self, X, y, mask=None, cut_param=None, only_fit_scaler=False): 

168 if y is not None: 

169 self.y_shape = y.shape 

170 if cut_param is None: 

171 y = y 

172 

173 else: 

174 print("cuting_target") 

175 min_cut, max_cut = cut_param 

176 y = cut(y, min_cut, max_cut) 

177 

178 if self.rescale: 

179 X, y = self._format(X, y, type_transform="fit_transform") 

180 

181 if only_fit_scaler: 

182 return () 

183 

184 # X, y = [X], y 

185 

186 return (X, y, mask) 

187 

188 def save(self, path=None, name=None): 

189 if name is None: 

190 name = self.name 

191 

192 if self.type_output == "Deep_ensemble": 

193 for n, model in enumerate(self.model): 

194 cur_name = name + "_" + str(n) 

195 new_path = os.path.join(path, cur_name) 

196 model.save_weights(new_path) 

197 else: 

198 new_path = os.path.join(path, name) 

199 self.model.save_weights(new_path) 

200 

201 model_tmp = self.model 

202 self.model = True 

203 dict_parameters = self.__dict__ 

204 uqproc.write(path, [name + "_params"], dict_parameters) 

205 self.model = model_tmp 

206 

207 def load(self, path, name=None): 

208 old_level_info = os.environ["TF_CPP_MIN_LOG_LEVEL"] 

209 os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2" 

210 if name is None: 

211 name = self.name 

212 

213 dict_parameters = uqproc.read(path, [name + "_params"]) 

214 for attributes, values in dict_parameters.items(): 

215 self.__setattr__(attributes, values) 

216 

217 self.init_neural_network() 

218 if self.type_output == "Deep_ensemble": 

219 for n, model in enumerate(self.model): 

220 new_path = os.path.join(path, name + "_" + str(n)) 

221 model.load_weights(new_path) 

222 else: 

223 new_path = os.path.join(path, name) 

224 self.model.load_weights(new_path) 

225 os.environ["TF_CPP_MIN_LOG_LEVEL"] = old_level_info 

226 

227 def compile(self, step=0, optimizer=None, loss=None, metrics=None, **kwarg): 

228 if optimizer is None: 

229 l_r = self.training_parameters["l_r"][step] 

230 optimizer = tf.keras.optimizers.experimental.Nadam(learning_rate=l_r) 

231 kwarg["optimizer"] = optimizer 

232 

233 if loss is None: 

234 loss_ = self.training_parameters["list_loss"][step] 

235 param_loss_current = self.training_parameters["param_loss"][step] 

236 loss = self.build_loss(loss_, param_loss_current) 

237 kwarg["loss"] = loss 

238 

239 if metrics is None: 

240 metrics = self.training_parameters["metrics"] 

241 kwarg["metrics"] = metrics 

242 self.model.compile(**kwarg) 

243 

244 def modify_dropout(self, dp): 

245 self.model.save_weights(self.name) 

246 self.model_parameters["dp"] = dp 

247 self.model = self.model_initializer(**self.model_parameters) 

248 self.initialized = True 

249 self.model.load_weights(self.name) 

250 

251 def reset(self): 

252 del self.model 

253 self.initialized = False 

254 

255 def init_neural_network(self): 

256 "apply model_initializer function with model_parameters and store in self.model" 

257 if self.random_state is not None: 

258 set_global_determinism(seed=self.random_state) 

259 

260 if self.type_output == "Deep_ensemble": 

261 self.model = [] 

262 for i in range(self.n_ech): 

263 self.model.append(self.model_initializer(**self.model_parameters)) 

264 else: 

265 self.model = self.model_initializer(**self.model_parameters) 

266 self.initialized = True 

267 

268 def fit( 

269 self, 

270 Inputs, 

271 Targets, 

272 train=None, 

273 test=None, 

274 training_parameters=None, 

275 verbose=None, 

276 **kwargs 

277 ): 

278 

279 print("start_fit") 

280 

281 if training_parameters is None: 

282 training_parameters = copy.deepcopy(self.training_parameters) 

283 if verbose is not None: 

284 training_parameters["verbose"] = verbose 

285 

286 if not (self.initialized) or not hasattr(self, "model"): 

287 self.init_neural_network() 

288 

289 if train is None: 

290 last_val = False 

291 if hasattr(self, "model_parameters") and ( 

292 "size_window" in self.model_parameters.keys() 

293 ): 

294 last_val = True 

295 

296 train, test = generate_train_test( 

297 len_=len(Targets), 

298 train_ratio=self.train_ratio, 

299 last_val=last_val, 

300 random_state=self.random_state, 

301 ) 

302 

303 if test is None: 

304 test = np.invert(train) 

305 

306 history = self.basic_fit( 

307 Inputs, Targets, train, test, **self.training_parameters 

308 ) 

309 

310 for i in history: 

311 self.history.append(history) 

312 

313 # Basic_predict function 

314 

315 def basic_fit( 

316 self, 

317 Inputs, 

318 Targets, 

319 train=None, 

320 test=None, 

321 epochs=[1000, 1000], 

322 b_s=[100, 20], 

323 l_r=[0.01, 0.005], 

324 sample_w=None, 

325 verbose=1, 

326 list_loss=["mse"], 

327 metrics=None, 

328 generator=None, 

329 steps_per_epoch=None, 

330 shuffle=True, 

331 callbacks="default", 

332 validation_freq=1, 

333 param_loss=None, 

334 test_batch_size=None, 

335 **kwargs 

336 ): 

337 

338 # Training function 

339 history = [] 

340 list_history = [] 

341 

342 if self.random_state is not None: 

343 set_global_determinism(seed=self.random_state) 

344 

345 if generator is None: 

346 generator = self.training_parameters["generator"] 

347 

348 if test_batch_size is None: 

349 test_batch_size = self.training_parameters["test_batch_size"] 

350 

351 if train is None: 

352 last_val = False 

353 if hasattr(self, "model_parameters") and ( 

354 "size_window" in self.model_parameters.keys() 

355 ): 

356 last_val = True 

357 

358 train, test = generate_train_test( 

359 len_=len(Targets), 

360 train_ratio=self.train_ratio, 

361 last_val=last_val, 

362 random_state=self.random_state, 

363 ) 

364 

365 if test is None: 

366 test = np.invert(train) 

367 

368 list_history = [] 

369 

370 if not (hasattr(self, "scaler")): 

371 _ = self.factory(Inputs, Targets, only_fit_scaler=True) 

372 

373 n_model = 1 

374 if self.type_output == "Deep_ensemble": 

375 n_model = self.n_ech 

376 list_sampletoremove = generate_K_fold_removing_index( 

377 n_model, 

378 k_fold=self.k_fold, 

379 train=train, 

380 data_drop=self.data_drop, 

381 random_state=self.random_state, 

382 ) 

383 

384 for n_model in range(n_model): 

385 train_ = np.copy(train) 

386 test_ = np.copy(test) 

387 

388 # Deep_ensemble : Submodel dataset differentiation if kfold activated 

389 if self.type_output == "Deep_ensemble": 

390 train_[list_sampletoremove[n_model]] = False 

391 test_[list_sampletoremove[n_model]] = True 

392 

393 for n, loss in enumerate(list_loss): 

394 for i, (batch_size, learning_rate) in enumerate(zip(b_s, l_r)): 

395 

396 loss_ = self.build_loss(loss, param_loss[n]) 

397 metrics = self.build_metrics(metrics) 

398 

399 if self.type_output == "Deep_ensemble": 

400 if (self.snapshot) & (n_model > 0): 

401 self.model[n_model] = tf.keras.clone_model(self.model[0]) 

402 

403 current_model = self.model[n_model] 

404 

405 else: 

406 current_model = self.model 

407 

408 current_model.compile( 

409 optimizer=tf.keras.optimizers.experimental.Nadam( 

410 learning_rate=learning_rate 

411 ), 

412 loss=loss_, 

413 metrics=metrics, 

414 ) 

415 

416 ( 

417 In_, 

418 Tar_, 

419 validation_data_, 

420 validation_steps, 

421 steps_per_epoch, 

422 batch_size, 

423 ) = self.dataset_generator( 

424 Inputs=apply_mask(Inputs, train_), 

425 Targets=apply_mask(Targets, train_), 

426 validation_data=( 

427 apply_mask(Inputs, test_), 

428 apply_mask(Targets, test_), 

429 ), 

430 batch_size=batch_size, 

431 generator=generator, 

432 shuffle=shuffle, 

433 test_batch_size=test_batch_size, 

434 ) 

435 

436 if callbacks == "default": 

437 callbacks = uqloss.default_callbacks() 

438 

439 history = current_model.fit( 

440 x=In_, 

441 y=Tar_, 

442 validation_data=validation_data_, 

443 epochs=epochs[i], 

444 steps_per_epoch=steps_per_epoch, 

445 validation_steps=validation_steps, 

446 batch_size=batch_size, 

447 sample_weight=sample_w, 

448 shuffle=shuffle, 

449 callbacks=callbacks, 

450 validation_freq=validation_freq, 

451 verbose=verbose, 

452 ) 

453 

454 current_model.compile() 

455 

456 list_history.append(history) 

457 

458 return list_history 

459 

460 def dataset_generator( 

461 self, 

462 Inputs, 

463 Targets, 

464 validation_data, 

465 batch_size, 

466 shuffle, 

467 generator, 

468 test_batch_size=None, 

469 ): 

470 """Hold case with or without data generator 

471 

472 Args: 

473 Inputs (_type_): _description_ 

474 Targets (_type_): _description_ 

475 validation_data (_type_): _description_ 

476 batch (_type_): _description_ 

477 shuffle (_type_): _description_ 

478 generator (_type_): _description_ 

479 

480 Returns: 

481 _type_: _description_ 

482 """ 

483 if generator: 

484 In_ = self.Build_generator( 

485 Inputs, Targets, batch=batch_size, shuffle=shuffle, train=True 

486 ) 

487 Tar_ = None 

488 

489 if test_batch_size is None: 

490 test_batch_size = self.training_parameters["test_batch_size"] 

491 

492 validation_data_ = self.Build_generator( 

493 validation_data[0], 

494 validation_data[1], 

495 batch=test_batch_size, 

496 shuffle=False, 

497 train=True, 

498 ) 

499 

500 steps_per_epoch = In_.__len__() 

501 validation_steps = validation_data_.__len__() 

502 batch_size = None 

503 else: 

504 In_ = Inputs 

505 Tar_ = Targets 

506 validation_data_ = validation_data 

507 validation_steps = None 

508 steps_per_epoch = None 

509 return ( 

510 In_, 

511 Tar_, 

512 validation_data_, 

513 validation_steps, 

514 steps_per_epoch, 

515 batch_size, 

516 ) 

517 

518 def predict(self, X, type_output=None, generator=None, **kwargs): 

519 if type_output is None: 

520 type_output = self.type_output 

521 

522 pred, UQ = self.basic_predict( 

523 X, n_ech=self.n_ech, type_output=type_output, generator=generator, **kwargs 

524 ) 

525 

526 if self.rescale: 

527 _, pred = self._format(None, pred, type_transform="inverse_transform") 

528 _, UQ = self._format( 

529 None, UQ, type_transform="inverse_transform", mode_UQ=True 

530 ) 

531 

532 return (pred, UQ) 

533 

534 def basic_predict( 

535 self, 

536 Inputs, 

537 n_ech=6, 

538 type_output="MC_Dropout", 

539 generator=None, 

540 test_batch_size=None, 

541 **kwarg 

542 ): 

543 # Variational prediction + variance estimation for step T+1 et T+4(lag) 

544 if generator is None: 

545 generator = self.training_parameters["generator"] 

546 

547 if self.random_state is not None: 

548 set_global_determinism(seed=self.random_state) 

549 

550 if generator: 

551 if test_batch_size is None: 

552 test_batch_size = self.training_parameters["test_batch_size"] 

553 Inputs = self.Build_generator( 

554 Inputs, None, batch=test_batch_size, shuffle=False, train=False 

555 ) 

556 

557 if type_output in ["MC_Dropout", "MC_Dropout_no_PNN"]: 

558 pred, UQ = Drawn_based_prediction( 

559 Inputs, 

560 self.model, 

561 n_ech, 

562 ddof=self.ddof, 

563 generator=generator, 

564 type_output=type_output, 

565 ) 

566 

567 elif type_output == "Deep_ensemble": 

568 pred, UQ = Ensemble_based_prediction( 

569 Inputs, 

570 self.model, 

571 ddof=self.ddof, 

572 generator=generator, 

573 type_output=type_output, 

574 ) 

575 

576 elif type_output in ["EDL", "PNN", "None", None]: 

577 pred, UQ = Deterministic_prediction( 

578 Inputs, 

579 self.model, 

580 ddof=self.ddof, 

581 generator=generator, 

582 type_output=type_output, 

583 ) 

584 

585 else: 

586 raise Exception( 

587 "Unknown type_output : choose 'MC_Dropout' or 'Deep_esemble' or 'EDL' or 'Non' or None" 

588 ) 

589 

590 return (pred, UQ) 

591 

592 def Build_generator(self, X, y, batch=32, shuffle=True, train=True): 

593 return default_Generator(X, y, self, batch=batch, shuffle=shuffle, train=train) 

594 

595 

596def Drawn_based_prediction( 

597 Inputs, model, n_ech, ddof, generator=False, type_output="MC_Dropout" 

598): 

599 """Prediction (mu,sigma) of Inputs using Drawn_based UQ-paragim (Ex : MC_dropout) 

600 

601 Args: 

602 model (tf.model): neural network 

603 n_ech (n_draw): number of dropout drawn 

604 Inputs (_type_): Inputs of model 

605 ddof (_type_): ddof 

606 generator (bool, optional): specify if Inputs is generator or not. Defaults to False. 

607 

608 Returns: 

609 _type_: _description_ 

610 """ 

611 

612 if generator: 

613 pred = [] 

614 var_a = [] 

615 var_e = [] 

616 for Inputs_gen, _ in Inputs: # by batch do n inf and aggreagate results 

617 output = [] 

618 for i in range(n_ech): 

619 output.append(model.predict(Inputs_gen)) 

620 

621 if type_output == "MC_Dropout_no_PNN": 

622 pred_ = np.array(output) 

623 var_a.append(0 * pred_.mean(axis=0)) 

624 

625 if type_output == "MC_Dropout": 

626 pred_, logvar = np.split(np.array(output), 2, -1) 

627 var_a.append(np.exp(logvar).mean(axis=0)) 

628 

629 pred.append(pred_.mean(axis=0)) 

630 var_e.append(pred_.var(axis=0)) 

631 

632 pred = np.concatenate(pred, axis=0) 

633 var_a = np.concatenate(var_a, axis=0) 

634 var_e = np.concatenate(var_e, axis=0) 

635 UQ = np.concatenate([var_a[None, :], var_e[None, :]], axis=0) 

636 else: 

637 output = [] 

638 for i in range(n_ech): 

639 output.append(model.predict(Inputs)) 

640 

641 if type_output == "MC_Dropout_no_PNN": 

642 pred_ = np.array(output) 

643 var_a = 0 * pred_.mean(axis=0) 

644 

645 if type_output == "MC_Dropout": 

646 pred_, logvar = np.split(np.array(output), 2, -1) 

647 var_a = np.exp(logvar).mean(axis=0) 

648 var_e = np.var(pred_, axis=0, ddof=ddof) 

649 pred = pred_.mean(axis=0) 

650 UQ = np.concatenate([var_a[None, :], var_e[None, :]], axis=0) 

651 return (pred, UQ) 

652 

653 

654def Deterministic_prediction(Inputs, model, ddof, generator=False, type_output=None): 

655 """Prediction (mu,sigma) of Inputs using Deterministic UQ-paragim (Ex : EDL) 

656 

657 Args: 

658 model (tf.model): neural network 

659 n_ech (n_draw): number of dropout drawn 

660 Inputs (_type_): Inputs of model 

661 ddof (_type_): ddof 

662 generator (bool, optional): specify if Inputs is generator or not. Defaults to False. 

663 type_output : type_output (EDL) 

664 

665 Returns: 

666 _type_: _description_ 

667 """ 

668 

669 if generator: 

670 output = [] 

671 for Inputs_gen, _ in Inputs: # by batch do n inf and aggreagate results 

672 output.append(model.predict(Inputs_gen)) 

673 output = np.concatenate(output, axis=0) 

674 

675 else: 

676 output = model.predict(Inputs) 

677 

678 if type_output == "EDL": 

679 gamma, vu, alpha, beta = np.split(output, 4, -1) 

680 alpha = alpha + 10e-6 

681 pred = gamma 

682 var_A = beta / (alpha - 1) 

683 # WARNING sqrt or not sqrt ? 

684 var_E = beta / (vu * (alpha - 1)) 

685 if (var_E == np.inf).sum() > 0: 

686 print("Warning inf values in var_E replace by s-min") 

687 if (var_A == np.inf).sum() > 0: 

688 print("Warning inf values in var_E replace by s-min") 

689 var_E[var_E == np.inf] = 0 

690 var_A[var_A == np.inf] = 0 

691 

692 elif type_output == "PNN": 

693 pred, logvar = np.split(output, 2, -1) 

694 var_A = np.exp(logvar) 

695 var_E = logvar * 0 

696 

697 else: 

698 pred = output 

699 var_A = 0 * pred 

700 var_E = 0 * pred 

701 

702 UQ = np.concatenate([var_E[None, :], var_A[None, :]], axis=0) 

703 return (pred, UQ) 

704 

705 

706def Ensemble_based_prediction(Inputs, models, ddof, generator=False, type_output=None): 

707 """Prediction (mu,sigma) of Inputs using Ensemble_based UQ-paradign 

708 

709 Args: 

710 model (tf.model): neural network 

711 n_ech (n_draw): number of dropout drawn 

712 Inputs (_type_): Inputs of model 

713 ddof (_type_): ddof 

714 generator (bool, optional): specify if Inputs is generator or not. Defaults to False. 

715 type_output : type_output (curently useless) 

716 

717 Returns: 

718 _type_: _description_ 

719 """ 

720 

721 if generator: 

722 pred = [] 

723 var_a = [] 

724 var_e = [] 

725 for Inputs_gen, _ in Inputs: # by batch do n inf and aggreagate results 

726 output = [] 

727 for submodel in models: 

728 output.append(submodel.predict(Inputs_gen)) 

729 

730 pred_, logvar = np.split(np.array(output), 2, -1) 

731 var_a.append(np.exp(logvar).mean(axis=0)) 

732 var_e.append(pred_.var(axis=0, ddof=ddof)) 

733 pred.append(pred_.mean(axis=0)) 

734 

735 pred = np.concatenate(pred, axis=0) 

736 var_a = np.concatenate(var_a, axis=0) 

737 var_e = np.concatenate(var_e, axis=0) 

738 else: 

739 output = [] 

740 for submodel in models: 

741 output.append(submodel.predict(Inputs)) 

742 pred, logvar = np.split(np.array(output), 2, -1) 

743 var_a = np.exp(logvar).mean(axis=0) 

744 var_e = np.var(pred, axis=0, ddof=ddof) 

745 pred = pred.mean(axis=0) 

746 UQ = np.concatenate([var_a[None, :], var_e[None, :]], axis=0) 

747 return (pred, UQ) 

748 

749 

750def get_training_parameters( 

751 epochs=[100, 100], 

752 b_s=[64, 32], 

753 l_r=[0.005, 0.001], 

754 list_loss=None, 

755 metrics=None, 

756 param_loss=None, 

757 type_output=None, 

758 generator=False, 

759 shuffle=True, 

760 verbose=1, 

761 sample_w=None, 

762 callbacks="default", 

763 **kwargs 

764): 

765 if list_loss is None: 

766 if type_output is None: 

767 list_loss = ["MSE"] 

768 metrics = ["MSE"] 

769 

770 if type_output == "MC_Dropout": 

771 list_loss = ["MSE", "BNN"] 

772 metrics = ["MSE", "BNN"] 

773 param_loss = [2, 0.9] 

774 

775 if type_output == "Deep_ensemble": 

776 list_loss = ["MSE", "BNN"] 

777 metrics = ["MSE", "BNN"] 

778 param_loss = [2, 0.9] 

779 

780 if type_output == "EDL": 

781 list_loss = ["MSE", "EDL", "EDL"] 

782 metrics = ["MSE", "BNN"] 

783 param_loss = [4, 1e-2, 10e-2] 

784 

785 dict_params = { 

786 "epochs": epochs, 

787 "b_s": b_s, 

788 "l_r": l_r, 

789 "sample_w": sample_w, 

790 "list_loss": list_loss, 

791 "metrics": metrics, 

792 "param_loss": param_loss, 

793 "generator": generator, 

794 "shuffle": shuffle, 

795 "verbose": verbose, 

796 "callbacks": callbacks, 

797 } 

798 

799 for key_arg in kwargs.keys(): 

800 dict_params[key_arg] = kwargs[key_arg] 

801 return dict_params 

802 

803 

804def get_params_dict( 

805 dim_in, 

806 dim_out=1, 

807 layers_size=[200, 150, 100], 

808 regularizer_=(0.0001, 0.0001), 

809 dp=None, 

810 name="MLP_UQ", 

811 type_output="MC_Dropout", 

812 **kwargs 

813): 

814 

815 dict_params = { 

816 "dim_in": dim_in, 

817 "dim_out": dim_out, 

818 "layers_size": layers_size, 

819 "regularizer_": regularizer_, 

820 "name": name, 

821 "n_ech": 5, 

822 "dp": dp, 

823 "type_output": type_output, 

824 "logvar_min": np.log(0.00005), 

825 } 

826 if type_output == "MC_Dropout": 

827 if dp is None: 

828 dict_params["dp"] = 0.15 

829 

830 if type_output == "Deep_ensemble": 

831 dict_params["n_ech"] = 5 

832 dict_params["k_fold"] = 8 

833 if dp is None: 

834 dict_params["dp"] = 0.02 

835 

836 if type_output == "EDL": 

837 if dp is None: 

838 dict_params["dp"] = 0.02 

839 

840 for key_arg in kwargs.keys(): 

841 dict_params[key_arg] = kwargs[key_arg] 

842 

843 return dict_params 

844 

845 

846class default_Generator(tf.keras.utils.Sequence): 

847 def __init__( 

848 self, X, y, metamodel, batch=64, shuffle=True, train=True, random_state=None 

849 ): 

850 self.X = X 

851 self.y = y 

852 self.len_ = len(y) 

853 self.train = train 

854 self.random_state = random_state 

855 self.shuffle = shuffle 

856 self.batch = batch 

857 

858 # self.scaler = metamodel.scaler 

859 self.factory = metamodel.factory 

860 self._format = metamodel._format 

861 self.rescale = metamodel.rescale 

862 

863 if shuffle: 

864 self.indices = np.arange(self.len_) 

865 np.random.seed(self.random_state) 

866 np.random.shuffle(self.indices) 

867 

868 def load(self, idx): 

869 idx = idx * self.batch 

870 

871 seuil_min = max(0, idx - 1) 

872 seuil_max = min(idx, self.len_ - 1) 

873 Inputs = self.X[seuil_min:seuil_max] 

874 Targets = self.y[seuil_min:seuil_max] 

875 return (Inputs, Targets) 

876 

877 def __len__(self): 

878 return self.len_ 

879 

880 def __getitem__(self, idx): 

881 if self.shuffle: 

882 idx = self.indices[idx] 

883 

884 x, y = self.load(idx) 

885 Inputs, Ouputs, _ = self.factory(x, y) 

886 

887 len_ = len(Inputs) 

888 selection = np.zeros(len(Inputs[0])) == 1 

889 

890 selection[max(0, idx * self.batch) : min(len_, (idx + 1) * self.batch)] = True 

891 

892 Inputs = apply_mask(Inputs, selection) 

893 Ouputs = apply_mask(Ouputs, selection) 

894 return Inputs, Ouputs 

895 

896 # shuffles the dataset at the end of each epoch 

897 def on_epoch_end(self): 

898 if self.shuffle: 

899 np.random.seed(self.random_state) 

900 np.random.shuffle(self.indices) 

901 

902 

903def generate_K_fold_removing_index( 

904 n_model, k_fold, train, data_drop, random_state=None 

905): 

906 """Generate liste of idx to remove for k_fold deep ensemble procedure 

907 

908 Args: 

909 n_model (_type_): Number of models 

910 k_fold (_type_): Number of fold 

911 train (_type_): train_flag_idx 

912 data_drop (_type_): % of data drop 

913 random_state : handle experimental random using seed 

914 Returns: 

915 _type_: list_sampletoremove idx of sample to remove of train for each submodel 

916 """ 

917 list_sampletoremove = [] 

918 if k_fold is not None: 

919 if k_fold < n_model: 

920 print("Warning kfold lesser than model number") 

921 # Drop data using Kfold + random drop ratio to add variability to deep ensemble 

922 for n_fold, (keep, removed) in enumerate( 

923 KFold(k_fold, shuffle=True, random_state=random_state).split(train) 

924 ): 

925 if data_drop > 0: 

926 np.random.seed(add_random_state(random_state, n_fold)) 

927 sampletoremove = np.random.choice( 

928 keep, int(len(keep) * data_drop), replace=False 

929 ) 

930 sampletoremove = sorted(np.concatenate([removed, sampletoremove])) 

931 list_sampletoremove.append(sampletoremove) 

932 else: 

933 list_sampletoremove.append([]) 

934 else: 

935 list_sampletoremove = [[] for i in range(n_model)] 

936 if data_drop > 0: 

937 for n, i in enumerate(list_sampletoremove): 

938 np.random.seed(add_random_state(random_state, n_fold)) 

939 sampletoremove = np.random.choice( 

940 np.arange(len(train)), 

941 int(len(train) * data_drop), 

942 replace=False, 

943 ) 

944 list_sampletoremove[n] = sampletoremove 

945 return list_sampletoremove 

946 

947 

948def generate_train_test(len_, train_ratio=0.92, last_val=True, random_state=None): 

949 if last_val: 

950 train = np.arange(len_) < train_ratio * len_ 

951 else: 

952 np.random.seed(random_state) 

953 train = np.random.rand(len_) < train_ratio 

954 test = np.invert(train) 

955 return (train, test)