Coverage for uqmodels/modelization/DL_estimator/neural_network_UQ.py: 72%
467 statements
« prev ^ index » next coverage.py v7.10.6, created at 2025-09-05 14:29 +0000
« prev ^ index » next coverage.py v7.10.6, created at 2025-09-05 14:29 +0000
1import copy
2import inspect
3import os
4import random
6import numpy as np
7import tensorflow as tf
8from sklearn.model_selection import KFold
10import uqmodels.modelization.DL_estimator.loss as uqloss
11import uqmodels.processing as uqproc
12from uqmodels.modelization.DL_estimator.metalayers import mlp
13from uqmodels.modelization.DL_estimator.utils import set_global_determinism
14from uqmodels.modelization.UQEstimator import UQEstimator, get_UQEstimator_parameters
15from uqmodels.utils import add_random_state, apply_mask, cut, generate_random_state
18def Identity_factory(X, y, **kwargs):
19 return (X, y, None)
22class NN_UQ(UQEstimator):
23 "Neural Network UQ"
25 def __init__(
26 self,
27 model_initializer,
28 model_parameters,
29 factory_parameters=dict(),
30 training_parameters=dict(),
31 type_output=None,
32 rescale=False,
33 n_ech=5,
34 train_ratio=0.9,
35 var_min=0.000001,
36 name="NN",
37 random_state=None,
38 ):
39 self.model_initializer = model_initializer
41 if random_state is not None:
42 params_list = list(inspect.signature(model_initializer).parameters)
44 if "seed" in params_list:
45 model_parameters["seed"] = random_state
47 elif "random_state" in params_list:
48 model_parameters["random_state"] = random_state
50 else:
51 print(
52 'Warning model_initializer have not "seed" or "random_state" parameters'
53 )
55 self.model_parameters = model_parameters
56 self.factory_parameters = factory_parameters
57 self.training_parameters = training_parameters
58 self.type_output = type_output
59 self.initialized = False
60 self.history = []
61 self.n_ech = n_ech
62 self.train_ratio = train_ratio
64 type_UQ = "var_A&E"
65 super().__init__(
66 name=name,
67 type_UQ=type_UQ,
68 rescale=rescale,
69 var_min=var_min,
70 random_state=random_state,
71 )
72 if self.random_state is not None:
73 model_parameters["random_state"] = random_state
75 if "generator" not in self.training_parameters.keys():
76 self.training_parameters["generator"] = False
78 if "test_batch_size" not in self.training_parameters.keys():
79 self.training_parameters["test_batch_size"] = 20000
81 # Additional deep ensemble parameter
82 self.ddof = 1
83 if "ddof" in model_parameters.keys():
84 self.ddof = model_parameters["ddof"]
86 if "train_ratio" in model_parameters.keys():
87 self.train_ratio = model_parameters["train_ratio"]
89 if "n_ech" in model_parameters.keys():
90 self.n_ech = model_parameters["n_ech"]
92 self.snapshot = False
93 if "snapshot" in model_parameters.keys():
94 self.snapshot = model_parameters["snapshot"]
96 self.data_drop = 0
97 if "data_drop" in model_parameters.keys():
98 self.data_drop = model_parameters["data_drop"]
100 if "k_fold" in model_parameters.keys():
101 self.k_fold = model_parameters["k_fold"]
103 def build_loss(self, loss, param_loss=None):
104 """Build loss from str or loss and loss_parameters
106 Args:
107 loss (_type_): _description_
108 param_loss (_type_, optional): _description_. Defaults to None.
110 Returns:
111 _type_: _description_
112 """
113 if loss == "BNN":
114 loss = uqloss.build_BNN_loss
115 if param_loss is None:
116 param_loss = {}
117 elif loss == "EDL":
118 loss = uqloss.build_EDL_loss
119 if param_loss is None:
120 param_loss = {}
121 elif loss == "MSE":
122 loss = uqloss.build_MSE_loss
123 if param_loss is None:
124 param_loss = {}
125 else:
126 pass
128 if param_loss is not None:
129 if isinstance(param_loss, dict):
130 loss = loss(**param_loss)
132 else:
133 loss = loss(param_loss)
134 else:
135 loss = loss
137 return loss
139 def build_metrics(self, metrics):
140 """Build list of metrics from str or metrics.
142 Args:
143 metrics (_type_): _description_
144 """
145 list_metrics = []
146 for metric in metrics:
147 if metric == "MSE":
148 output_size = 1
149 if self.type_output in ["MC_Dropout", "Deep_ensemble"]:
150 output_size = 2
151 elif self.type_output in ["EDL"]:
152 output_size = 4
153 metric = uqloss.build_MSE_loss(output_size, metric=True)
154 elif metric == "BNN":
155 metric = uqloss.build_BNN_loss(
156 0.95, metric=True, type_output=self.type_output
157 )
158 else:
159 pass
160 list_metrics.append(metric)
161 return list_metrics
163 def _format(self, X, y, type_transform, mode_UQ=False):
164 X, y = super()._format(X, y, type_transform=type_transform, mode_UQ=mode_UQ)
165 return (X, y)
167 def factory(self, X, y, mask=None, cut_param=None, only_fit_scaler=False):
168 if y is not None:
169 self.y_shape = y.shape
170 if cut_param is None:
171 y = y
173 else:
174 print("cuting_target")
175 min_cut, max_cut = cut_param
176 y = cut(y, min_cut, max_cut)
178 if self.rescale:
179 X, y = self._format(X, y, type_transform="fit_transform")
181 if only_fit_scaler:
182 return ()
184 # X, y = [X], y
186 return (X, y, mask)
188 def save(self, path=None, name=None):
189 if name is None:
190 name = self.name
192 if self.type_output == "Deep_ensemble":
193 for n, model in enumerate(self.model):
194 cur_name = name + "_" + str(n)
195 new_path = os.path.join(path, cur_name)
196 model.save_weights(new_path)
197 else:
198 new_path = os.path.join(path, name)
199 self.model.save_weights(new_path)
201 model_tmp = self.model
202 self.model = True
203 dict_parameters = self.__dict__
204 uqproc.write(path, [name + "_params"], dict_parameters)
205 self.model = model_tmp
207 def load(self, path, name=None):
208 old_level_info = os.environ["TF_CPP_MIN_LOG_LEVEL"]
209 os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"
210 if name is None:
211 name = self.name
213 dict_parameters = uqproc.read(path, [name + "_params"])
214 for attributes, values in dict_parameters.items():
215 self.__setattr__(attributes, values)
217 self.init_neural_network()
218 if self.type_output == "Deep_ensemble":
219 for n, model in enumerate(self.model):
220 new_path = os.path.join(path, name + "_" + str(n))
221 model.load_weights(new_path)
222 else:
223 new_path = os.path.join(path, name)
224 self.model.load_weights(new_path)
225 os.environ["TF_CPP_MIN_LOG_LEVEL"] = old_level_info
227 def compile(self, step=0, optimizer=None, loss=None, metrics=None, **kwarg):
228 if optimizer is None:
229 l_r = self.training_parameters["l_r"][step]
230 optimizer = tf.keras.optimizers.experimental.Nadam(learning_rate=l_r)
231 kwarg["optimizer"] = optimizer
233 if loss is None:
234 loss_ = self.training_parameters["list_loss"][step]
235 param_loss_current = self.training_parameters["param_loss"][step]
236 loss = self.build_loss(loss_, param_loss_current)
237 kwarg["loss"] = loss
239 if metrics is None:
240 metrics = self.training_parameters["metrics"]
241 kwarg["metrics"] = metrics
242 self.model.compile(**kwarg)
244 def modify_dropout(self, dp):
245 self.model.save_weights(self.name)
246 self.model_parameters["dp"] = dp
247 self.model = self.model_initializer(**self.model_parameters)
248 self.initialized = True
249 self.model.load_weights(self.name)
251 def reset(self):
252 del self.model
253 self.initialized = False
255 def init_neural_network(self):
256 "apply model_initializer function with model_parameters and store in self.model"
257 if self.random_state is not None:
258 set_global_determinism(seed=self.random_state)
260 if self.type_output == "Deep_ensemble":
261 self.model = []
262 for i in range(self.n_ech):
263 self.model.append(self.model_initializer(**self.model_parameters))
264 else:
265 self.model = self.model_initializer(**self.model_parameters)
266 self.initialized = True
268 def fit(
269 self,
270 Inputs,
271 Targets,
272 train=None,
273 test=None,
274 training_parameters=None,
275 verbose=None,
276 **kwargs
277 ):
279 print("start_fit")
281 if training_parameters is None:
282 training_parameters = copy.deepcopy(self.training_parameters)
283 if verbose is not None:
284 training_parameters["verbose"] = verbose
286 if not (self.initialized) or not hasattr(self, "model"):
287 self.init_neural_network()
289 if train is None:
290 last_val = False
291 if hasattr(self, "model_parameters") and (
292 "size_window" in self.model_parameters.keys()
293 ):
294 last_val = True
296 train, test = generate_train_test(
297 len_=len(Targets),
298 train_ratio=self.train_ratio,
299 last_val=last_val,
300 random_state=self.random_state,
301 )
303 if test is None:
304 test = np.invert(train)
306 history = self.basic_fit(
307 Inputs, Targets, train, test, **self.training_parameters
308 )
310 for i in history:
311 self.history.append(history)
313 # Basic_predict function
315 def basic_fit(
316 self,
317 Inputs,
318 Targets,
319 train=None,
320 test=None,
321 epochs=[1000, 1000],
322 b_s=[100, 20],
323 l_r=[0.01, 0.005],
324 sample_w=None,
325 verbose=1,
326 list_loss=["mse"],
327 metrics=None,
328 generator=None,
329 steps_per_epoch=None,
330 shuffle=True,
331 callbacks="default",
332 validation_freq=1,
333 param_loss=None,
334 test_batch_size=None,
335 **kwargs
336 ):
338 # Training function
339 history = []
340 list_history = []
342 if self.random_state is not None:
343 set_global_determinism(seed=self.random_state)
345 if generator is None:
346 generator = self.training_parameters["generator"]
348 if test_batch_size is None:
349 test_batch_size = self.training_parameters["test_batch_size"]
351 if train is None:
352 last_val = False
353 if hasattr(self, "model_parameters") and (
354 "size_window" in self.model_parameters.keys()
355 ):
356 last_val = True
358 train, test = generate_train_test(
359 len_=len(Targets),
360 train_ratio=self.train_ratio,
361 last_val=last_val,
362 random_state=self.random_state,
363 )
365 if test is None:
366 test = np.invert(train)
368 list_history = []
370 if not (hasattr(self, "scaler")):
371 _ = self.factory(Inputs, Targets, only_fit_scaler=True)
373 n_model = 1
374 if self.type_output == "Deep_ensemble":
375 n_model = self.n_ech
376 list_sampletoremove = generate_K_fold_removing_index(
377 n_model,
378 k_fold=self.k_fold,
379 train=train,
380 data_drop=self.data_drop,
381 random_state=self.random_state,
382 )
384 for n_model in range(n_model):
385 train_ = np.copy(train)
386 test_ = np.copy(test)
388 # Deep_ensemble : Submodel dataset differentiation if kfold activated
389 if self.type_output == "Deep_ensemble":
390 train_[list_sampletoremove[n_model]] = False
391 test_[list_sampletoremove[n_model]] = True
393 for n, loss in enumerate(list_loss):
394 for i, (batch_size, learning_rate) in enumerate(zip(b_s, l_r)):
396 loss_ = self.build_loss(loss, param_loss[n])
397 metrics = self.build_metrics(metrics)
399 if self.type_output == "Deep_ensemble":
400 if (self.snapshot) & (n_model > 0):
401 self.model[n_model] = tf.keras.clone_model(self.model[0])
403 current_model = self.model[n_model]
405 else:
406 current_model = self.model
408 current_model.compile(
409 optimizer=tf.keras.optimizers.experimental.Nadam(
410 learning_rate=learning_rate
411 ),
412 loss=loss_,
413 metrics=metrics,
414 )
416 (
417 In_,
418 Tar_,
419 validation_data_,
420 validation_steps,
421 steps_per_epoch,
422 batch_size,
423 ) = self.dataset_generator(
424 Inputs=apply_mask(Inputs, train_),
425 Targets=apply_mask(Targets, train_),
426 validation_data=(
427 apply_mask(Inputs, test_),
428 apply_mask(Targets, test_),
429 ),
430 batch_size=batch_size,
431 generator=generator,
432 shuffle=shuffle,
433 test_batch_size=test_batch_size,
434 )
436 if callbacks == "default":
437 callbacks = uqloss.default_callbacks()
439 history = current_model.fit(
440 x=In_,
441 y=Tar_,
442 validation_data=validation_data_,
443 epochs=epochs[i],
444 steps_per_epoch=steps_per_epoch,
445 validation_steps=validation_steps,
446 batch_size=batch_size,
447 sample_weight=sample_w,
448 shuffle=shuffle,
449 callbacks=callbacks,
450 validation_freq=validation_freq,
451 verbose=verbose,
452 )
454 current_model.compile()
456 list_history.append(history)
458 return list_history
460 def dataset_generator(
461 self,
462 Inputs,
463 Targets,
464 validation_data,
465 batch_size,
466 shuffle,
467 generator,
468 test_batch_size=None,
469 ):
470 """Hold case with or without data generator
472 Args:
473 Inputs (_type_): _description_
474 Targets (_type_): _description_
475 validation_data (_type_): _description_
476 batch (_type_): _description_
477 shuffle (_type_): _description_
478 generator (_type_): _description_
480 Returns:
481 _type_: _description_
482 """
483 if generator:
484 In_ = self.Build_generator(
485 Inputs, Targets, batch=batch_size, shuffle=shuffle, train=True
486 )
487 Tar_ = None
489 if test_batch_size is None:
490 test_batch_size = self.training_parameters["test_batch_size"]
492 validation_data_ = self.Build_generator(
493 validation_data[0],
494 validation_data[1],
495 batch=test_batch_size,
496 shuffle=False,
497 train=True,
498 )
500 steps_per_epoch = In_.__len__()
501 validation_steps = validation_data_.__len__()
502 batch_size = None
503 else:
504 In_ = Inputs
505 Tar_ = Targets
506 validation_data_ = validation_data
507 validation_steps = None
508 steps_per_epoch = None
509 return (
510 In_,
511 Tar_,
512 validation_data_,
513 validation_steps,
514 steps_per_epoch,
515 batch_size,
516 )
518 def predict(self, X, type_output=None, generator=None, **kwargs):
519 if type_output is None:
520 type_output = self.type_output
522 pred, UQ = self.basic_predict(
523 X, n_ech=self.n_ech, type_output=type_output, generator=generator, **kwargs
524 )
526 if self.rescale:
527 _, pred = self._format(None, pred, type_transform="inverse_transform")
528 _, UQ = self._format(
529 None, UQ, type_transform="inverse_transform", mode_UQ=True
530 )
532 return (pred, UQ)
534 def basic_predict(
535 self,
536 Inputs,
537 n_ech=6,
538 type_output="MC_Dropout",
539 generator=None,
540 test_batch_size=None,
541 **kwarg
542 ):
543 # Variational prediction + variance estimation for step T+1 et T+4(lag)
544 if generator is None:
545 generator = self.training_parameters["generator"]
547 if self.random_state is not None:
548 set_global_determinism(seed=self.random_state)
550 if generator:
551 if test_batch_size is None:
552 test_batch_size = self.training_parameters["test_batch_size"]
553 Inputs = self.Build_generator(
554 Inputs, None, batch=test_batch_size, shuffle=False, train=False
555 )
557 if type_output in ["MC_Dropout", "MC_Dropout_no_PNN"]:
558 pred, UQ = Drawn_based_prediction(
559 Inputs,
560 self.model,
561 n_ech,
562 ddof=self.ddof,
563 generator=generator,
564 type_output=type_output,
565 )
567 elif type_output == "Deep_ensemble":
568 pred, UQ = Ensemble_based_prediction(
569 Inputs,
570 self.model,
571 ddof=self.ddof,
572 generator=generator,
573 type_output=type_output,
574 )
576 elif type_output in ["EDL", "PNN", "None", None]:
577 pred, UQ = Deterministic_prediction(
578 Inputs,
579 self.model,
580 ddof=self.ddof,
581 generator=generator,
582 type_output=type_output,
583 )
585 else:
586 raise Exception(
587 "Unknown type_output : choose 'MC_Dropout' or 'Deep_esemble' or 'EDL' or 'Non' or None"
588 )
590 return (pred, UQ)
592 def Build_generator(self, X, y, batch=32, shuffle=True, train=True):
593 return default_Generator(X, y, self, batch=batch, shuffle=shuffle, train=train)
596def Drawn_based_prediction(
597 Inputs, model, n_ech, ddof, generator=False, type_output="MC_Dropout"
598):
599 """Prediction (mu,sigma) of Inputs using Drawn_based UQ-paragim (Ex : MC_dropout)
601 Args:
602 model (tf.model): neural network
603 n_ech (n_draw): number of dropout drawn
604 Inputs (_type_): Inputs of model
605 ddof (_type_): ddof
606 generator (bool, optional): specify if Inputs is generator or not. Defaults to False.
608 Returns:
609 _type_: _description_
610 """
612 if generator:
613 pred = []
614 var_a = []
615 var_e = []
616 for Inputs_gen, _ in Inputs: # by batch do n inf and aggreagate results
617 output = []
618 for i in range(n_ech):
619 output.append(model.predict(Inputs_gen))
621 if type_output == "MC_Dropout_no_PNN":
622 pred_ = np.array(output)
623 var_a.append(0 * pred_.mean(axis=0))
625 if type_output == "MC_Dropout":
626 pred_, logvar = np.split(np.array(output), 2, -1)
627 var_a.append(np.exp(logvar).mean(axis=0))
629 pred.append(pred_.mean(axis=0))
630 var_e.append(pred_.var(axis=0))
632 pred = np.concatenate(pred, axis=0)
633 var_a = np.concatenate(var_a, axis=0)
634 var_e = np.concatenate(var_e, axis=0)
635 UQ = np.concatenate([var_a[None, :], var_e[None, :]], axis=0)
636 else:
637 output = []
638 for i in range(n_ech):
639 output.append(model.predict(Inputs))
641 if type_output == "MC_Dropout_no_PNN":
642 pred_ = np.array(output)
643 var_a = 0 * pred_.mean(axis=0)
645 if type_output == "MC_Dropout":
646 pred_, logvar = np.split(np.array(output), 2, -1)
647 var_a = np.exp(logvar).mean(axis=0)
648 var_e = np.var(pred_, axis=0, ddof=ddof)
649 pred = pred_.mean(axis=0)
650 UQ = np.concatenate([var_a[None, :], var_e[None, :]], axis=0)
651 return (pred, UQ)
654def Deterministic_prediction(Inputs, model, ddof, generator=False, type_output=None):
655 """Prediction (mu,sigma) of Inputs using Deterministic UQ-paragim (Ex : EDL)
657 Args:
658 model (tf.model): neural network
659 n_ech (n_draw): number of dropout drawn
660 Inputs (_type_): Inputs of model
661 ddof (_type_): ddof
662 generator (bool, optional): specify if Inputs is generator or not. Defaults to False.
663 type_output : type_output (EDL)
665 Returns:
666 _type_: _description_
667 """
669 if generator:
670 output = []
671 for Inputs_gen, _ in Inputs: # by batch do n inf and aggreagate results
672 output.append(model.predict(Inputs_gen))
673 output = np.concatenate(output, axis=0)
675 else:
676 output = model.predict(Inputs)
678 if type_output == "EDL":
679 gamma, vu, alpha, beta = np.split(output, 4, -1)
680 alpha = alpha + 10e-6
681 pred = gamma
682 var_A = beta / (alpha - 1)
683 # WARNING sqrt or not sqrt ?
684 var_E = beta / (vu * (alpha - 1))
685 if (var_E == np.inf).sum() > 0:
686 print("Warning inf values in var_E replace by s-min")
687 if (var_A == np.inf).sum() > 0:
688 print("Warning inf values in var_E replace by s-min")
689 var_E[var_E == np.inf] = 0
690 var_A[var_A == np.inf] = 0
692 elif type_output == "PNN":
693 pred, logvar = np.split(output, 2, -1)
694 var_A = np.exp(logvar)
695 var_E = logvar * 0
697 else:
698 pred = output
699 var_A = 0 * pred
700 var_E = 0 * pred
702 UQ = np.concatenate([var_E[None, :], var_A[None, :]], axis=0)
703 return (pred, UQ)
706def Ensemble_based_prediction(Inputs, models, ddof, generator=False, type_output=None):
707 """Prediction (mu,sigma) of Inputs using Ensemble_based UQ-paradign
709 Args:
710 model (tf.model): neural network
711 n_ech (n_draw): number of dropout drawn
712 Inputs (_type_): Inputs of model
713 ddof (_type_): ddof
714 generator (bool, optional): specify if Inputs is generator or not. Defaults to False.
715 type_output : type_output (curently useless)
717 Returns:
718 _type_: _description_
719 """
721 if generator:
722 pred = []
723 var_a = []
724 var_e = []
725 for Inputs_gen, _ in Inputs: # by batch do n inf and aggreagate results
726 output = []
727 for submodel in models:
728 output.append(submodel.predict(Inputs_gen))
730 pred_, logvar = np.split(np.array(output), 2, -1)
731 var_a.append(np.exp(logvar).mean(axis=0))
732 var_e.append(pred_.var(axis=0, ddof=ddof))
733 pred.append(pred_.mean(axis=0))
735 pred = np.concatenate(pred, axis=0)
736 var_a = np.concatenate(var_a, axis=0)
737 var_e = np.concatenate(var_e, axis=0)
738 else:
739 output = []
740 for submodel in models:
741 output.append(submodel.predict(Inputs))
742 pred, logvar = np.split(np.array(output), 2, -1)
743 var_a = np.exp(logvar).mean(axis=0)
744 var_e = np.var(pred, axis=0, ddof=ddof)
745 pred = pred.mean(axis=0)
746 UQ = np.concatenate([var_a[None, :], var_e[None, :]], axis=0)
747 return (pred, UQ)
750def get_training_parameters(
751 epochs=[100, 100],
752 b_s=[64, 32],
753 l_r=[0.005, 0.001],
754 list_loss=None,
755 metrics=None,
756 param_loss=None,
757 type_output=None,
758 generator=False,
759 shuffle=True,
760 verbose=1,
761 sample_w=None,
762 callbacks="default",
763 **kwargs
764):
765 if list_loss is None:
766 if type_output is None:
767 list_loss = ["MSE"]
768 metrics = ["MSE"]
770 if type_output == "MC_Dropout":
771 list_loss = ["MSE", "BNN"]
772 metrics = ["MSE", "BNN"]
773 param_loss = [2, 0.9]
775 if type_output == "Deep_ensemble":
776 list_loss = ["MSE", "BNN"]
777 metrics = ["MSE", "BNN"]
778 param_loss = [2, 0.9]
780 if type_output == "EDL":
781 list_loss = ["MSE", "EDL", "EDL"]
782 metrics = ["MSE", "BNN"]
783 param_loss = [4, 1e-2, 10e-2]
785 dict_params = {
786 "epochs": epochs,
787 "b_s": b_s,
788 "l_r": l_r,
789 "sample_w": sample_w,
790 "list_loss": list_loss,
791 "metrics": metrics,
792 "param_loss": param_loss,
793 "generator": generator,
794 "shuffle": shuffle,
795 "verbose": verbose,
796 "callbacks": callbacks,
797 }
799 for key_arg in kwargs.keys():
800 dict_params[key_arg] = kwargs[key_arg]
801 return dict_params
804def get_params_dict(
805 dim_in,
806 dim_out=1,
807 layers_size=[200, 150, 100],
808 regularizer_=(0.0001, 0.0001),
809 dp=None,
810 name="MLP_UQ",
811 type_output="MC_Dropout",
812 **kwargs
813):
815 dict_params = {
816 "dim_in": dim_in,
817 "dim_out": dim_out,
818 "layers_size": layers_size,
819 "regularizer_": regularizer_,
820 "name": name,
821 "n_ech": 5,
822 "dp": dp,
823 "type_output": type_output,
824 "logvar_min": np.log(0.00005),
825 }
826 if type_output == "MC_Dropout":
827 if dp is None:
828 dict_params["dp"] = 0.15
830 if type_output == "Deep_ensemble":
831 dict_params["n_ech"] = 5
832 dict_params["k_fold"] = 8
833 if dp is None:
834 dict_params["dp"] = 0.02
836 if type_output == "EDL":
837 if dp is None:
838 dict_params["dp"] = 0.02
840 for key_arg in kwargs.keys():
841 dict_params[key_arg] = kwargs[key_arg]
843 return dict_params
846class default_Generator(tf.keras.utils.Sequence):
847 def __init__(
848 self, X, y, metamodel, batch=64, shuffle=True, train=True, random_state=None
849 ):
850 self.X = X
851 self.y = y
852 self.len_ = len(y)
853 self.train = train
854 self.random_state = random_state
855 self.shuffle = shuffle
856 self.batch = batch
858 # self.scaler = metamodel.scaler
859 self.factory = metamodel.factory
860 self._format = metamodel._format
861 self.rescale = metamodel.rescale
863 if shuffle:
864 self.indices = np.arange(self.len_)
865 np.random.seed(self.random_state)
866 np.random.shuffle(self.indices)
868 def load(self, idx):
869 idx = idx * self.batch
871 seuil_min = max(0, idx - 1)
872 seuil_max = min(idx, self.len_ - 1)
873 Inputs = self.X[seuil_min:seuil_max]
874 Targets = self.y[seuil_min:seuil_max]
875 return (Inputs, Targets)
877 def __len__(self):
878 return self.len_
880 def __getitem__(self, idx):
881 if self.shuffle:
882 idx = self.indices[idx]
884 x, y = self.load(idx)
885 Inputs, Ouputs, _ = self.factory(x, y)
887 len_ = len(Inputs)
888 selection = np.zeros(len(Inputs[0])) == 1
890 selection[max(0, idx * self.batch) : min(len_, (idx + 1) * self.batch)] = True
892 Inputs = apply_mask(Inputs, selection)
893 Ouputs = apply_mask(Ouputs, selection)
894 return Inputs, Ouputs
896 # shuffles the dataset at the end of each epoch
897 def on_epoch_end(self):
898 if self.shuffle:
899 np.random.seed(self.random_state)
900 np.random.shuffle(self.indices)
903def generate_K_fold_removing_index(
904 n_model, k_fold, train, data_drop, random_state=None
905):
906 """Generate liste of idx to remove for k_fold deep ensemble procedure
908 Args:
909 n_model (_type_): Number of models
910 k_fold (_type_): Number of fold
911 train (_type_): train_flag_idx
912 data_drop (_type_): % of data drop
913 random_state : handle experimental random using seed
914 Returns:
915 _type_: list_sampletoremove idx of sample to remove of train for each submodel
916 """
917 list_sampletoremove = []
918 if k_fold is not None:
919 if k_fold < n_model:
920 print("Warning kfold lesser than model number")
921 # Drop data using Kfold + random drop ratio to add variability to deep ensemble
922 for n_fold, (keep, removed) in enumerate(
923 KFold(k_fold, shuffle=True, random_state=random_state).split(train)
924 ):
925 if data_drop > 0:
926 np.random.seed(add_random_state(random_state, n_fold))
927 sampletoremove = np.random.choice(
928 keep, int(len(keep) * data_drop), replace=False
929 )
930 sampletoremove = sorted(np.concatenate([removed, sampletoremove]))
931 list_sampletoremove.append(sampletoremove)
932 else:
933 list_sampletoremove.append([])
934 else:
935 list_sampletoremove = [[] for i in range(n_model)]
936 if data_drop > 0:
937 for n, i in enumerate(list_sampletoremove):
938 np.random.seed(add_random_state(random_state, n_fold))
939 sampletoremove = np.random.choice(
940 np.arange(len(train)),
941 int(len(train) * data_drop),
942 replace=False,
943 )
944 list_sampletoremove[n] = sampletoremove
945 return list_sampletoremove
948def generate_train_test(len_, train_ratio=0.92, last_val=True, random_state=None):
949 if last_val:
950 train = np.arange(len_) < train_ratio * len_
951 else:
952 np.random.seed(random_state)
953 train = np.random.rand(len_) < train_ratio
954 test = np.invert(train)
955 return (train, test)