Coverage for uqmodels / utils.py: 31%

416 statements  

« prev     ^ index     » next       coverage.py v7.13.0, created at 2025-12-09 08:15 +0000

1import math 

2import sys 

3from copy import deepcopy 

4from typing import Iterable 

5import copy 

6import numpy as np 

7import scipy 

8import tensorflow as tf 

9from joblib import Parallel, delayed 

10from sklearn.decomposition import PCA 

11from sklearn.manifold import TSNE 

12from sklearn.model_selection import RandomizedSearchCV, TimeSeriesSplit 

13from sklearn.preprocessing import StandardScaler 

14 

15EPSILON = sys.float_info.min # small value to avoid underflow 

16 

17 

18def identity(*args): 

19 return args 

20 

21 

22def add_random_state(random_state, values): 

23 """hold addition with possible None values 

24 

25 Args: 

26 random_state (int or None): Random state 

27 values (int): Values to add 

28 

29 Returns: 

30 random_state: int or None 

31 """ 

32 if random_state is None: 

33 return None 

34 else: 

35 return random_state + values 

36 

37 

38def generate_random_state(random_state=None): 

39 """Drawn random number is random_state is None, else return random_state 

40 

41 Args: 

42 random_state (int or None, optional): random_state. Defaults to None. 

43 

44 Returns: 

45 random_state: random_state int 

46 """ 

47 if random_state is None: 

48 random_state = np.random.randint(0, 10000000) 

49 return random_state 

50 

51 

52def norm_signal(y): 

53 return (y - y.mean()) / y.std() 

54 

55 

56def cum_sum_consecutive_zero(array): 

57 """Count consecutive 0 

58 1 0 1 0 0 0 -> 0 1 0 1 2 3 

59 

60 Args: 

61 array (_type_): array of consecutive 0 

62 

63 Returns: 

64 _type_: _description_ 

65 """ 

66 len_ = len(array) 

67 inds = np.nonzero(array)[0] 

68 consecutive_count = np.zeros(len_) 

69 

70 if not inds[0] == 0: 

71 consecutive_count[: inds[0]] += 1 

72 inds = np.concatenate([[0], inds]) 

73 

74 flag = np.ones(len_) 

75 cpt = 1 

76 flag[inds] = 0 

77 while flag.sum() > 0: 

78 inds = inds + 1 

79 if inds[-1] > len_ - 1: 

80 inds = inds[:-1] 

81 inds = inds[flag[inds] != 0] 

82 consecutive_count[inds] += cpt 

83 flag[inds] = 0 

84 cpt += 1 

85 return consecutive_count 

86 

87 

88def threshold(array, min_val=None, max_val=None): 

89 """Threshold an array with min_val as lower bound and max_val as upper bound 

90 Can hold mutlidimensional threshold on last dimension 

91 Args: 

92 array (_type_): array nd to threeshold 

93 min_val (val, array or): _description_ 

94 max_val (_type_): _description_ 

95 """ 

96 if min_val is not None: 

97 array = np.maximum(array, min_val) 

98 if max_val is not None: 

99 array = np.minimum(array, max_val) 

100 return array 

101 

102 

103def corr_matrix_array(m, a): 

104 """ 

105 Parameters 

106 ---------- 

107 a: numpy array 

108 v: true val 

109 

110 Returns 

111 ------- 

112 c: numpy array 

113 correlation coefficients of v against matrix m 

114 """ 

115 a = a.reshape(-1, 1) 

116 

117 mean_t = np.mean(m, axis=0) 

118 std_t = np.std(m, axis=0) 

119 

120 mean_i = np.mean(a, axis=0) 

121 std_i = np.std(a, axis=0) 

122 

123 mean_xy = np.mean(m * a, axis=0) 

124 

125 c = (mean_xy - mean_i * mean_t) / (std_i * std_t) 

126 return c 

127 

128 

129def mask_corr_feature_target(X, y, v_seuil=0.05): 

130 """ 

131 Return a boolean array indicating which features show a max absolute 

132 correlation with any target column above the threshold. 

133 

134 Parameters 

135 ---------- 

136 X : ndarray (n_samples, n_features) 

137 y : ndarray (n_samples, n_targets) 

138 v_seuil : float 

139 """ 

140 v_corr = np.abs(corr_matrix_array(X, y[:, 0])) 

141 for i in np.arange(y.shape[1] - 1): 

142 v_corr = np.maximum(v_corr, np.abs(corr_matrix_array(X, y[:, i + 1]))) 

143 return v_corr > v_seuil 

144 

145 

146def coefficients_spreaded(X): 

147 coefficients = np.array( 

148 [ 

149 1, 

150 2, 

151 3, 

152 5, 

153 8, 

154 10, 

155 15, 

156 20, 

157 25, 

158 33, 

159 50, 

160 100, 

161 250, 

162 500, 

163 1000, 

164 2500, 

165 5000, 

166 10000, 

167 25000, 

168 50000, 

169 100000, 

170 ] 

171 ) 

172 mask = coefficients < X 

173 return coefficients[mask] 

174 

175 

176def Extract_dict(dictionaire, str_keys): 

177 list_extract = [] 

178 for str_name in str_keys: 

179 list_extract.append(dictionaire[str_name]) 

180 

181 if len(list_extract) == 1: 

182 list_extract = list_extract[0] 

183 return list_extract 

184 

185 

186def sizeof_fmt(num, suffix="B"): 

187 """by Fred Cirera, https://stackoverflow.com/a/1094933/1870254, modified""" 

188 for unit in ["", "Ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi"]: 

189 if abs(num) < 1024.0: 

190 return f"{num:3.1f} {unit}{suffix}" 

191 num /= 1024.0 

192 return f"{num:.1f} Yi{suffix}" 

193 

194 

195def track_memory(string): 

196 print("Tracking_memory : " + string) 

197 for name, size in sorted( 

198 ((name, sys.getsizeof(value)) for name, value in list(globals().items())), 

199 key=lambda x: -x[1], 

200 )[:10]: 

201 print(f"{name:>30}: {sizeof_fmt(size):>8}") 

202 

203 

204# Track memorry leakage/ 

205 

206 

207def propagate(bool_array, n_prop=1, inv=False, sym=False): 

208 # Propagate True of a boolean flag to previous and following index 

209 for _ in range(n_prop): 

210 if inv or sym: 

211 bool_array = bool_array | np.roll(bool_array, -1) 

212 elif np.invert(inv) or sym: 

213 bool_array = bool_array | np.roll(bool_array, 1) 

214 return bool_array 

215 

216 

217def expand_flag(flag): 

218 # Propagate True of a boolean flag to previous and folloxing index 

219 return (np.roll(flag, 1) + np.roll(flag, 0) + np.roll(flag, -1)) > 0 

220 

221 

222def get_coeff(alpha): 

223 return scipy.stats.norm.ppf(alpha, 0, 1) 

224 

225 

226def flatten(y): 

227 if y is not None: 

228 if (len(y.shape) == 2) & (y.shape[1] == 1): 

229 y = y[:, 0] 

230 return y 

231 

232 

233# Folding function 

234 

235 

236def get_fold_nstep(size_window, size_subseq, padding): 

237 # Compute the number of step according to size of window, size of subseq and padding. 

238 return int(np.floor((size_window - size_subseq) / (padding))) + 1 

239 

240 

241# To do update : Hold padding to limit train redunduncy 

242def stack_and_roll(array, horizon, lag=0, seq_idx=None, step=1): 

243 """_summary_ 

244 

245 Args: 

246 array (_type_): array 2D to stack and roll 

247 horizon (_type_): depth 

248 lag (int, optional): _description_. Defaults to 0. 

249 seq_idx (_type_, optional): _description_. Defaults to None. 

250 step (int, optional): _description_. Defaults to 1. 

251 

252 Returns: 

253 _type_: _description_ 

254 """ 

255 # Perform a stack "horizon" time an array and roll in temporal stairs. 

256 # a n+1 dimensional array with value rolled k times for the kth line 

257 shape = (1, horizon, 1) 

258 new_array = np.tile(array[:, None], shape) 

259 

260 if seq_idx is None: 

261 for i in np.arange(horizon): 

262 lag_current = horizon - lag - i - 1 

263 new_array[:, i] = np.roll(new_array[:, i], lag_current, axis=0) 

264 

265 # Erase procedure 

266 for i in np.arange(horizon): 

267 lag_current = horizon - lag - i - 1 

268 if lag_current > 0: 

269 new_array[i, :lag_current] = 0 

270 

271 # by blocks : 

272 

273 else: 

274 _, idx = np.unique(seq_idx, return_index=True) 

275 for j in seq_idx[np.sort(idx)]: 

276 flag = seq_idx == j 

277 list_idx = np.nonzero(flag)[0] 

278 for i in np.arange(horizon): 

279 lag_current = horizon - lag - i - 1 

280 new_array[flag, i] = np.roll(new_array[flag, i], lag_current, axis=0) 

281 # A optimize : get first non zeros values. 

282 for i in np.arange(horizon): 

283 lag_current = horizon - lag - i - 1 

284 new_array[list_idx[i], :lag_current] = 0 

285 if step > 1: 

286 mask = (np.arange(len(array)) % step) == step - 1 

287 new_array = new_array[mask] 

288 return new_array 

289 

290 

291def stack_and_roll_layer(inputs, size_window, size_subseq, padding, name=""): 

292 # slide_tensor = [] 

293 n_step = get_fold_nstep(size_window, size_subseq, padding) 

294 # Implementation numpy 

295 # if False: 

296 # for i in range(n_step): 

297 # slide_tensor.append( 

298 # inputs[:, (i * padding) : (i * padding) + size_subseq, :][:, None] 

299 # ) 

300 # return Lambda(lambda x: K.concatenate(x, axis=1), name=name + "_rollstack")( 

301 # slide_tensor 

302 # ) 

303 x = tf.map_fn( 

304 lambda i: inputs[:, (i * padding): (i * padding) + size_subseq, :], 

305 tf.range(n_step), 

306 fn_output_signature=tf.float32, 

307 ) 

308 x = tf.transpose(x, [1, 0, 2, 3]) 

309 return x 

310 

311 

312def apply_middledim_reduction(ndarray, reduc_filter=None, roll=0): 

313 """Apply middledim using ponderate mean reduc_filter weigth or do nothing 

314 

315 Args: 

316 ndarray (np.array): object to reduce 

317 reduc_filter (np.array): ponderate weigth for reduction 

318 

319 Return: 

320 reduced_ndarray: reduced object 

321 """ 

322 

323 ndarray = deepcopy(ndarray) 

324 if reduc_filter is None: 

325 return ndarray 

326 

327 reduc_filter = np.array(reduc_filter) 

328 

329 dim_g = ndarray.shape[-1] 

330 list_reduced_ndarray_by_dim = [] 

331 # Pour chaque dimension de s 

332 for i in range(dim_g): 

333 # Incompatibility test : 

334 if len(reduc_filter) != ndarray.shape[1]: 

335 print( 

336 "error expect size of reduc filter : ", 

337 str(ndarray.shape[1]), 

338 "current size :", 

339 str(len(reduc_filter)), 

340 ) 

341 break 

342 

343 filt = reduc_filter / reduc_filter.sum() 

344 ndarray[:, :, i] = ndarray[:, :, i] * filt 

345 if roll: 

346 reduced_ndarray = ndarray[:, 0, i] 

347 for j in range(ndarray.shape[1] - 1): 

348 reduced_ndarray += np.roll(ndarray[:, j + 1, i], (j + 1) * roll, axis=0) 

349 else: 

350 ndarray[:, :, i] = ndarray[:, :, i] * filt 

351 reduced_ndarray = ndarray[:, :, i].sum(axis=1) 

352 

353 list_reduced_ndarray_by_dim.append(reduced_ndarray[:, None]) 

354 reduced_ndarray = np.concatenate(list_reduced_ndarray_by_dim, axis=1) 

355 return reduced_ndarray 

356 

357 

358def apply_mask(list_or_array_or_none, mask, axis=0, mode="bool_array"): 

359 """Select subpart of an array/list_of_array/tupple using a mask and np.take function. 

360 If list_or_array_or_none is array, then direclty apply selection on it. 

361 else if it is a Tupple or list of array apply it on array in the list/tupple structure 

362 

363 Args: 

364 list_or_array_or_none (_type_): ND array (list or tupple or ND.array) 

365 mask (_type_): Mask as boolean array or indices array. 

366 axis (int, optional): axis on sub array to apply. Defaults to 1. 

367 mode (str, optional):if 'bool_array', turn bool_array_mask into an indice_array. Defaults to 'bool_array'. 

368 

369 Returns: 

370 (List or Tuple or array or Ndarray) : Sub_selected list of array or Ndarray 

371 """ 

372 

373 if mode == "bool_array": 

374 indices = np.arange(len(mask))[mask] 

375 

376 if type(list_or_array_or_none) in [list, tuple]: 

377 return [np.take(array, indices, axis=axis) for array in list_or_array_or_none] 

378 if list_or_array_or_none is None: 

379 return None 

380 else: 

381 return np.take(list_or_array_or_none, indices, axis=axis) 

382 

383 

384def base_cos_freq(array, freq=[2]): 

385 """Transform 1D modulo features [1, N] in cyclic (cos,sin) features for given freq""" 

386 features = [] 

387 for i in freq: 

388 features.append(np.cos(i * math.pi * array)) 

389 features.append(np.sin(i * math.pi * array)) 

390 return np.concatenate(features).reshape(len(freq) * 2, -1).T 

391 

392 

393# Preprocessing function 

394 

395 

396def apply_conv(score, filt=None, reduc_filter=None): 

397 """Apply naivly by dimension a convolution to s using filt as filter 

398 

399 Args: 

400 s (np.array): score to 1D convolute 

401 filt (np.array): filter to apply 

402 

403 Returns: 

404 s: convoluted score 

405 """ 

406 if filt is None: 

407 return score 

408 

409 if len(score.shape) == 3: 

410 dim_g = score.shape[-1] 

411 # Pour chaque dimension de s 

412 for t in range(score.shape[1]): 

413 for i in range(score.shape[2]): 

414 score[:, t, i] = np.convolve(score[:, t, i], filt, mode="same") 

415 else: 

416 dim_g = score.shape[-1] 

417 # Pour chaque dimension de s 

418 for i in range(dim_g): 

419 score[:, i] = np.convolve(score[:, i], filt, mode="same") 

420 return score 

421 

422 

423def cut(values, cut_min, cut_max): 

424 """Apply percentile minimal and maximal threeshold 

425 

426 Args: 

427 values (_type_): values to cut 

428 cut_min (_type_): percentile of the minimam cut threeshold 

429 cut_max (_type_): percentile of the maximal cut threeshold 

430 

431 Returns: 

432 _type_: _description_ 

433 """ 

434 

435 if values is None: 

436 return None 

437 

438 vpmin = np.quantile(values, cut_min, axis=0) 

439 vpmax = np.quantile(values, cut_max, axis=0) 

440 values = np.minimum(values, vpmax) 

441 values = np.maximum(values, vpmin) 

442 return values 

443 

444 

445def format_data(self, X, y, fit=False, mode=None, flag_inverse=False): 

446 """Feature and target Formatting""" 

447 if self.rescale: 

448 flag_reshape = False 

449 if y is not None: 

450 if len(y.shape) == 1: 

451 flag_reshape = True 

452 y = y[:, None] 

453 

454 if fit: 

455 scalerX = StandardScaler(with_mean=True, with_std=True) 

456 X = scalerX.fit_transform(X) 

457 scalerY = StandardScaler(with_mean=True, with_std=True) 

458 y = scalerY.fit_transform(y) 

459 self.scaler = [scalerX, scalerY] 

460 

461 elif not flag_inverse: 

462 if X is not None: 

463 X = self.scaler[0].transform(X) 

464 if y is not None: 

465 y = self.scaler[1].transform(y) 

466 else: 

467 X_transformer = self.scaler[0].inverse_transform 

468 Y_transformer = self.scaler[1].inverse_transform 

469 if X is not None and not len(X) == 0: 

470 X = X_transformer(X) 

471 

472 if y is not None and not len(y) == 0: 

473 sigma = np.sqrt(self.scaler[1].var_) 

474 if (mode == "sigma") | (mode == "var") | (mode == "var_A&E"): 

475 y = y * sigma 

476 

477 elif mode == "2sigma": 

478 y_reshape = np.moveaxis(y, -1, 0) 

479 if len(y.shape) == 3: 

480 y = np.concatenate( 

481 [ 

482 np.expand_dims(i, 0) 

483 for i in [y_reshape[0] * sigma, y_reshape[1] * sigma] 

484 ], 

485 axis=0, 

486 ) 

487 else: 

488 y = np.concatenate( 

489 [ 

490 y_reshape[0] * sigma, 

491 y_reshape[1] * sigma, 

492 ], 

493 axis=-1, 

494 ) 

495 elif mode == "quantile": 

496 y_bot = Y_transformer(y[:, 0]) 

497 y_top = Y_transformer(y[:, 1]) 

498 y = np.concatenate([y_bot, y_top], axis=1) 

499 

500 else: 

501 y = Y_transformer(y) 

502 

503 if flag_reshape: 

504 y = y[:, 0] 

505 return (X, y) 

506 

507 

508# PIs basics computation functionalities 

509 

510 

511def compute_born(y_pred, sigma, alpha, mode="sigma"): 

512 """Compute y_upper and y_lower boundary from gaussian hypothesis (sigma or 2sigma) 

513 

514 Args: 

515 y_pred (array) : Mean prediction 

516 sigma (array) : Variance estimation 

517 alpha (float) : Misscoverage ratio 

518 mode (str) : Distribution hypothesis 

519 (sigma : gaussian residual hypothesis, 2sigma : gaussian positive and negative residual hypothesis) 

520 

521 Returns: 

522 (y_lower,y_upper): Lower and upper bondary of Predictive interval 

523 """ 

524 # Case sigma 

525 if mode == "sigma": 

526 y_lower = y_pred + scipy.stats.norm.ppf((alpha / 2), 0, sigma) 

527 y_upper = y_pred + scipy.stats.norm.ppf((1 - (alpha / 2)), 0, sigma) 

528 # Case 2 sigma 

529 elif mode == "2sigma": 

530 sigma = np.moveaxis(sigma, -1, 0) 

531 y_lower = y_pred + scipy.stats.norm.ppf((alpha / 2), 0, sigma[0]) 

532 y_upper = y_pred + scipy.stats.norm.ppf((1 - (alpha / 2)), 0, sigma[1]) 

533 else: 

534 raise NotImplementedError("This mode is not yet implemented.") 

535 return y_lower, y_upper 

536 

537 

538# Gaussian mixture quantile estimation : 

539def mixture_quantile(pred, var_A, quantiles, n_jobs=5): 

540 def aux_mixture_quantile(pred, var_A, quantiles): 

541 list_q = [] 

542 n_data = pred.shape[1] 

543 n_mixture = pred.shape[0] 

544 for n in range(n_data): 

545 mean_law = scipy.stats.norm( 

546 pred[:, n, 0].mean(), np.sqrt(var_A[:, n, 0].mean()) 

547 ) 

548 xmin = mean_law.ppf(0.0000001) 

549 xmax = mean_law.ppf(0.9999999) 

550 scale = np.arange(xmin, xmax, (xmax - xmin) / 300) 

551 Mixture_cdf = np.zeros(len(scale)) 

552 for i in range(n_mixture): 

553 cur_law = scipy.stats.norm(pred[i, n, 0], np.sqrt(var_A[i, n, 0])) 

554 Mixture_cdf += cur_law.cdf(scale) / n_mixture 

555 q_val = [] 

556 for q in quantiles: 

557 q_val.append(scale[np.abs(Mixture_cdf - q).argmin()]) 

558 list_q.append(q_val) 

559 return np.array(list_q) 

560 

561 list_q = [] 

562 n_data = pred.shape[1] 

563 parallel_partition = np.array_split(np.arange(n_data), 3) 

564 # Split inputs of auxillar parralel tree statistics extraction 

565 parallel_input = [] 

566 for partition in parallel_partition: 

567 parallel_input.append((pred[:, partition], var_A[:, partition], quantiles)) 

568 list_q = Parallel(n_jobs=n_jobs)( 

569 delayed(aux_mixture_quantile)(*inputs) for inputs in parallel_input 

570 ) 

571 return np.concatenate(list_q, axis=0) 

572 

573 

574# Scikit tuning function 

575 

576 

577def aux_tuning( 

578 model, 

579 X, 

580 Y, 

581 params=None, 

582 score="neg_mean_squared_error", 

583 n_esti=100, 

584 folds=4, 

585 random_state=None, 

586): 

587 """Random_search with sequential k-split 

588 

589 Args: 

590 model (scikit model): Estimator 

591 X ([type]): Features 

592 Y ([type]): Target 

593 params ([type], optional): parameter_grid. Defaults to None. 

594 score (str, optional): score. Defaults to 'neg_mean_squared_error'. 

595 n_esti (int, optional): Number of grid try . Defaults to 100. 

596 folds (int, optional): Number of sequential fold. Defaults to 4. 

597 verbose (int, optional): [description]. Defaults to 0. 

598 random_state (bool) : handle experimental random using seeds. 

599 """ 

600 if isinstance(params, type(None)): 

601 return model 

602 tscv = TimeSeriesSplit(n_splits=folds) 

603 random_search = RandomizedSearchCV( 

604 model, 

605 param_distributions=params, 

606 n_iter=n_esti, 

607 scoring=score, 

608 n_jobs=8, 

609 cv=tscv.split(X), 

610 verbose=1, 

611 random_state=random_state, 

612 ) 

613 Y = np.squeeze(Y) 

614 random_search.fit(X, Y) 

615 return random_search.best_estimator_ 

616 

617 

618def agg_list(list_: Iterable): 

619 try: 

620 return np.concatenate(list_, axis=0) 

621 except ValueError: 

622 return None 

623 

624 

625def agg_func(list_: Iterable): 

626 try: 

627 return np.mean(list_, axis=0) 

628 except TypeError: 

629 return None 

630 

631 

632class GenericCalibrator: 

633 def __init__(self, type_res="res", mode="symetric", name=None, alpha=0.1): 

634 """Generic calibrator implementing several calibratio 

635 

636 Args: 

637 type_res (str, optional): type of score 

638 "no_calib : No calibration 

639 "res" : Calibration based on mean residuals 

640 "w_res" : Calibration based on weigthed mean residuals 

641 "cqr" : Calibration based on quantile residuals 

642 

643 mode (str, optional): 

644 if "symetric" : symetric calibration () 

645 else perform calibration independently on positive and negative residuals. 

646 name (_type_, optional): _description_. Defaults to None. 

647 alpha (float, optional): _description_. Defaults to 0.1. 

648 """ 

649 super().__init__() 

650 self._residuals = None 

651 self.name = name 

652 if name is None: 

653 self.name = type_res + "_" + mode 

654 self.mode = mode 

655 self.alpha = alpha 

656 self.type_res = type_res 

657 

658 if mode == "symetric": 

659 self.fcorr = 1 

660 else: 

661 self.fcorr_lower = 1 

662 self.fcorr_upper = 1 

663 

664 def estimate( 

665 self, y_true, y_pred, y_pred_lower, y_pred_upper, sigma_pred, **kwargs 

666 ): 

667 flag_res_lower = np.zeros(y_true.shape) 

668 flag_res_upper = np.zeros(y_true.shape) 

669 

670 if (self.type_res == "res") | (self.type_res == "w_res"): 

671 if self.type_res == "res": 

672 residuals = y_true - y_pred 

673 sigma_pred = y_true * 0 + 1 

674 

675 if self.type_res == "w_res": 

676 # sigma_pred = np.maximum(y_pred_upper - y_pred_lower, EPSILON) / 2 

677 residuals = y_true - y_pred 

678 

679 if len(residuals.shape) == 1: 

680 flag_res_lower = residuals <= 0 

681 flag_res_upper = residuals >= 0 

682 

683 else: 

684 flag_res_lower = np.concatenate( 

685 [ 

686 np.expand_dims(residuals[:, i] <= 0, -1) 

687 for i in range(0, y_true.shape[1]) 

688 ] 

689 ) 

690 flag_res_upper = np.concatenate( 

691 [ 

692 np.expand_dims(residuals[:, i] >= 0, -1) 

693 for i in range(0, y_true.shape[1]) 

694 ] 

695 ) 

696 

697 if y_pred.shape != sigma_pred.shape: 

698 sigma_pred = np.moveaxis(sigma_pred, -1, 0) 

699 residuals[flag_res_lower] = ( 

700 np.abs(residuals)[flag_res_lower] / (sigma_pred[0])[flag_res_lower] 

701 ) 

702 residuals[flag_res_upper] = ( 

703 np.abs(residuals)[flag_res_upper] / (sigma_pred[1])[flag_res_upper] 

704 ) 

705 else: 

706 residuals = np.abs(residuals) / (sigma_pred) 

707 

708 elif self.type_res == "cqr": 

709 residuals = np.maximum(y_pred_lower - y_true, y_true - y_pred_upper) 

710 flag_res_lower = (y_pred_lower - y_true) >= (y_true - y_pred_upper) 

711 flag_res_upper = (y_pred_lower - y_true) <= (y_true - y_pred_upper) 

712 

713 elif self.type_res == "no_calib": 

714 return 

715 

716 else: 

717 print("Unknown type_res") 

718 return 

719 

720 if self.mode == "symetric": 

721 self.fcorr = np.quantile( 

722 residuals, (1 - self.alpha) * (1 + 1 / len(residuals)) 

723 ) 

724 

725 else: 

726 self.fcorr_lower = np.quantile( 

727 residuals[flag_res_lower], 

728 np.minimum((1 - self.alpha) * (1 + 1 / flag_res_lower.sum()), 1), 

729 ) 

730 

731 self.fcorr_upper = np.quantile( 

732 residuals[flag_res_upper], 

733 np.minimum((1 - self.alpha) * (1 + 1 / flag_res_upper.sum()), 1), 

734 ) 

735 return 

736 

737 def calibrate(self, y_pred, y_pred_lower, y_pred_upper, sigma_pred, **kwargs): 

738 if self.type_res == "res": 

739 sigma_pred = y_pred * 0 + 1 

740 

741 if self.mode == "symetric": 

742 fcorr_lower = self.fcorr 

743 fcorr_upper = self.fcorr 

744 else: 

745 fcorr_lower = self.fcorr_lower 

746 fcorr_upper = self.fcorr_upper 

747 

748 if self.type_res in ["res", "w_res", "no_calib"]: 

749 if y_pred.shape != sigma_pred.shape: 

750 sigma_pred = np.moveaxis(sigma_pred, -1, 0) 

751 y_pred_lower = y_pred - sigma_pred[0] * fcorr_lower 

752 y_pred_upper = y_pred + sigma_pred[1] * fcorr_upper 

753 else: 

754 y_pred_lower = y_pred - sigma_pred * fcorr_lower 

755 y_pred_upper = y_pred + sigma_pred * fcorr_upper 

756 

757 elif self.type_res in ["cqr"]: 

758 y_pred_upper = y_pred_upper + fcorr_upper 

759 y_pred_lower = y_pred_lower - fcorr_lower 

760 else: 

761 print("Unknown type_res") 

762 return 

763 

764 return y_pred_lower, y_pred_upper 

765 

766 

767def dimensional_reduction(data, mode="umap", reducer=None, fit=True, **kwargs): 

768 if "n_components" not in kwargs.keys(): 

769 kwargs["n_components"] = 3 

770 

771 if reducer is not None: 

772 pass 

773 

774 else: 

775 if mode == "umap": 

776 if "n_neighbors" not in kwargs: 

777 kwargs["n_neighbors"] = 50 

778 if "min_dist" not in kwargs: 

779 kwargs["min_dist"] = 0.01 

780 # reducer = umap.UMAP(**kwargs) 

781 

782 elif mode == "tsne": 

783 reducer = TSNE(**kwargs) 

784 

785 elif mode == "pca": 

786 reducer = PCA(**kwargs) 

787 

788 embedding = reducer.fit_transform(data) 

789 return (embedding, reducer) 

790 

791 

792# Moving average function for Lag feature extraction 

793 

794 

795def fit_compute_lag(Y, lag=[1, 2, 3], delay=0): 

796 """Create lag features from a numerical array 

797 Args: 

798 Y (float array): Target to extract lag-feature 

799 lag (int, optional): Lag number. Defaults to 3. 

800 delay (int, optional): Delay before 1 lag feature. Defaults to 0. 

801 """ 

802 dim = Y.shape 

803 new_features_list = [] 

804 new_features_name = [] 

805 for i in np.array(lag) + delay: 

806 Y_cur = np.roll(Y, i, axis=0) 

807 if i > 0: 

808 Y_cur[0:i] = 0 

809 for g in range(dim[1]): 

810 new_features_list.append(Y_cur[:, g]) 

811 new_features_name.append("lag_" + str(i) + "_dim:" + str(g)) 

812 new_features_name = np.array(new_features_name) 

813 return (np.array(new_features_list).T, new_features_name) 

814 

815 

816def autocorr(x): 

817 "Compute autocorrelation of x" 

818 result = np.correlate(x, x, mode="full") 

819 return result[int(result.size / 2):] 

820 

821 

822def convolute_1D(array, filter=None): 

823 """Convolution by dimension for 1 od 2D array using np.convolve 

824 

825 Args: 

826 array (_type_): array_to_convolve 

827 filter (_type_, optional): convolution fitler. Defaults to None. 

828 

829 Raises: 

830 ValueError: dimension error 

831 

832 Returns: 

833 array: array_convoluted 

834 """ 

835 if filter is None: 

836 return array 

837 else: 

838 array_convoluted = [] 

839 if len(array.shape) == 1: 

840 array_convoluted = np.convolve(array, filter, "same") 

841 elif len(array.shape) == 2: 

842 for i in range(array.shape[1]): 

843 array_convoluted.append( 

844 np.convolve(array[:, i], filter, "same")[:, None] 

845 ) 

846 array_convoluted = np.concatenate(array_convoluted, axis=1) 

847 else: 

848 print("Inapropriate size of array : hold 1 or 2d array") 

849 raise ValueError 

850 return array_convoluted 

851 

852 

853def _merge_config(default_cfg, user_cfg): 

854 """Return a deep-merged copy of default_cfg updated with user_cfg.""" 

855 if user_cfg is None: 

856 return copy.deepcopy(default_cfg) 

857 

858 cfg = copy.deepcopy(default_cfg) 

859 for key, value in user_cfg.items(): 

860 if ( 

861 key in cfg 

862 and isinstance(cfg[key], dict) 

863 and isinstance(value, dict) 

864 ): 

865 cfg[key].update(value) 

866 else: 

867 cfg[key] = value 

868 return cfg 

869 

870 

871def _merge_nested(default_cfg, user_cfg): 

872 if user_cfg is None: 

873 return default_cfg.copy() 

874 cfg = default_cfg.copy() 

875 for k, v in user_cfg.items(): 

876 if isinstance(v, dict) and isinstance(cfg.get(k), dict): 

877 sub = cfg[k].copy() 

878 sub.update(v) 

879 cfg[k] = sub 

880 else: 

881 cfg[k] = v 

882 return cfg