Coverage for uqmodels / utils.py: 31%
416 statements
« prev ^ index » next coverage.py v7.13.0, created at 2025-12-09 08:15 +0000
« prev ^ index » next coverage.py v7.13.0, created at 2025-12-09 08:15 +0000
1import math
2import sys
3from copy import deepcopy
4from typing import Iterable
5import copy
6import numpy as np
7import scipy
8import tensorflow as tf
9from joblib import Parallel, delayed
10from sklearn.decomposition import PCA
11from sklearn.manifold import TSNE
12from sklearn.model_selection import RandomizedSearchCV, TimeSeriesSplit
13from sklearn.preprocessing import StandardScaler
15EPSILON = sys.float_info.min # small value to avoid underflow
18def identity(*args):
19 return args
22def add_random_state(random_state, values):
23 """hold addition with possible None values
25 Args:
26 random_state (int or None): Random state
27 values (int): Values to add
29 Returns:
30 random_state: int or None
31 """
32 if random_state is None:
33 return None
34 else:
35 return random_state + values
38def generate_random_state(random_state=None):
39 """Drawn random number is random_state is None, else return random_state
41 Args:
42 random_state (int or None, optional): random_state. Defaults to None.
44 Returns:
45 random_state: random_state int
46 """
47 if random_state is None:
48 random_state = np.random.randint(0, 10000000)
49 return random_state
52def norm_signal(y):
53 return (y - y.mean()) / y.std()
56def cum_sum_consecutive_zero(array):
57 """Count consecutive 0
58 1 0 1 0 0 0 -> 0 1 0 1 2 3
60 Args:
61 array (_type_): array of consecutive 0
63 Returns:
64 _type_: _description_
65 """
66 len_ = len(array)
67 inds = np.nonzero(array)[0]
68 consecutive_count = np.zeros(len_)
70 if not inds[0] == 0:
71 consecutive_count[: inds[0]] += 1
72 inds = np.concatenate([[0], inds])
74 flag = np.ones(len_)
75 cpt = 1
76 flag[inds] = 0
77 while flag.sum() > 0:
78 inds = inds + 1
79 if inds[-1] > len_ - 1:
80 inds = inds[:-1]
81 inds = inds[flag[inds] != 0]
82 consecutive_count[inds] += cpt
83 flag[inds] = 0
84 cpt += 1
85 return consecutive_count
88def threshold(array, min_val=None, max_val=None):
89 """Threshold an array with min_val as lower bound and max_val as upper bound
90 Can hold mutlidimensional threshold on last dimension
91 Args:
92 array (_type_): array nd to threeshold
93 min_val (val, array or): _description_
94 max_val (_type_): _description_
95 """
96 if min_val is not None:
97 array = np.maximum(array, min_val)
98 if max_val is not None:
99 array = np.minimum(array, max_val)
100 return array
103def corr_matrix_array(m, a):
104 """
105 Parameters
106 ----------
107 a: numpy array
108 v: true val
110 Returns
111 -------
112 c: numpy array
113 correlation coefficients of v against matrix m
114 """
115 a = a.reshape(-1, 1)
117 mean_t = np.mean(m, axis=0)
118 std_t = np.std(m, axis=0)
120 mean_i = np.mean(a, axis=0)
121 std_i = np.std(a, axis=0)
123 mean_xy = np.mean(m * a, axis=0)
125 c = (mean_xy - mean_i * mean_t) / (std_i * std_t)
126 return c
129def mask_corr_feature_target(X, y, v_seuil=0.05):
130 """
131 Return a boolean array indicating which features show a max absolute
132 correlation with any target column above the threshold.
134 Parameters
135 ----------
136 X : ndarray (n_samples, n_features)
137 y : ndarray (n_samples, n_targets)
138 v_seuil : float
139 """
140 v_corr = np.abs(corr_matrix_array(X, y[:, 0]))
141 for i in np.arange(y.shape[1] - 1):
142 v_corr = np.maximum(v_corr, np.abs(corr_matrix_array(X, y[:, i + 1])))
143 return v_corr > v_seuil
146def coefficients_spreaded(X):
147 coefficients = np.array(
148 [
149 1,
150 2,
151 3,
152 5,
153 8,
154 10,
155 15,
156 20,
157 25,
158 33,
159 50,
160 100,
161 250,
162 500,
163 1000,
164 2500,
165 5000,
166 10000,
167 25000,
168 50000,
169 100000,
170 ]
171 )
172 mask = coefficients < X
173 return coefficients[mask]
176def Extract_dict(dictionaire, str_keys):
177 list_extract = []
178 for str_name in str_keys:
179 list_extract.append(dictionaire[str_name])
181 if len(list_extract) == 1:
182 list_extract = list_extract[0]
183 return list_extract
186def sizeof_fmt(num, suffix="B"):
187 """by Fred Cirera, https://stackoverflow.com/a/1094933/1870254, modified"""
188 for unit in ["", "Ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi"]:
189 if abs(num) < 1024.0:
190 return f"{num:3.1f} {unit}{suffix}"
191 num /= 1024.0
192 return f"{num:.1f} Yi{suffix}"
195def track_memory(string):
196 print("Tracking_memory : " + string)
197 for name, size in sorted(
198 ((name, sys.getsizeof(value)) for name, value in list(globals().items())),
199 key=lambda x: -x[1],
200 )[:10]:
201 print(f"{name:>30}: {sizeof_fmt(size):>8}")
204# Track memorry leakage/
207def propagate(bool_array, n_prop=1, inv=False, sym=False):
208 # Propagate True of a boolean flag to previous and following index
209 for _ in range(n_prop):
210 if inv or sym:
211 bool_array = bool_array | np.roll(bool_array, -1)
212 elif np.invert(inv) or sym:
213 bool_array = bool_array | np.roll(bool_array, 1)
214 return bool_array
217def expand_flag(flag):
218 # Propagate True of a boolean flag to previous and folloxing index
219 return (np.roll(flag, 1) + np.roll(flag, 0) + np.roll(flag, -1)) > 0
222def get_coeff(alpha):
223 return scipy.stats.norm.ppf(alpha, 0, 1)
226def flatten(y):
227 if y is not None:
228 if (len(y.shape) == 2) & (y.shape[1] == 1):
229 y = y[:, 0]
230 return y
233# Folding function
236def get_fold_nstep(size_window, size_subseq, padding):
237 # Compute the number of step according to size of window, size of subseq and padding.
238 return int(np.floor((size_window - size_subseq) / (padding))) + 1
241# To do update : Hold padding to limit train redunduncy
242def stack_and_roll(array, horizon, lag=0, seq_idx=None, step=1):
243 """_summary_
245 Args:
246 array (_type_): array 2D to stack and roll
247 horizon (_type_): depth
248 lag (int, optional): _description_. Defaults to 0.
249 seq_idx (_type_, optional): _description_. Defaults to None.
250 step (int, optional): _description_. Defaults to 1.
252 Returns:
253 _type_: _description_
254 """
255 # Perform a stack "horizon" time an array and roll in temporal stairs.
256 # a n+1 dimensional array with value rolled k times for the kth line
257 shape = (1, horizon, 1)
258 new_array = np.tile(array[:, None], shape)
260 if seq_idx is None:
261 for i in np.arange(horizon):
262 lag_current = horizon - lag - i - 1
263 new_array[:, i] = np.roll(new_array[:, i], lag_current, axis=0)
265 # Erase procedure
266 for i in np.arange(horizon):
267 lag_current = horizon - lag - i - 1
268 if lag_current > 0:
269 new_array[i, :lag_current] = 0
271 # by blocks :
273 else:
274 _, idx = np.unique(seq_idx, return_index=True)
275 for j in seq_idx[np.sort(idx)]:
276 flag = seq_idx == j
277 list_idx = np.nonzero(flag)[0]
278 for i in np.arange(horizon):
279 lag_current = horizon - lag - i - 1
280 new_array[flag, i] = np.roll(new_array[flag, i], lag_current, axis=0)
281 # A optimize : get first non zeros values.
282 for i in np.arange(horizon):
283 lag_current = horizon - lag - i - 1
284 new_array[list_idx[i], :lag_current] = 0
285 if step > 1:
286 mask = (np.arange(len(array)) % step) == step - 1
287 new_array = new_array[mask]
288 return new_array
291def stack_and_roll_layer(inputs, size_window, size_subseq, padding, name=""):
292 # slide_tensor = []
293 n_step = get_fold_nstep(size_window, size_subseq, padding)
294 # Implementation numpy
295 # if False:
296 # for i in range(n_step):
297 # slide_tensor.append(
298 # inputs[:, (i * padding) : (i * padding) + size_subseq, :][:, None]
299 # )
300 # return Lambda(lambda x: K.concatenate(x, axis=1), name=name + "_rollstack")(
301 # slide_tensor
302 # )
303 x = tf.map_fn(
304 lambda i: inputs[:, (i * padding): (i * padding) + size_subseq, :],
305 tf.range(n_step),
306 fn_output_signature=tf.float32,
307 )
308 x = tf.transpose(x, [1, 0, 2, 3])
309 return x
312def apply_middledim_reduction(ndarray, reduc_filter=None, roll=0):
313 """Apply middledim using ponderate mean reduc_filter weigth or do nothing
315 Args:
316 ndarray (np.array): object to reduce
317 reduc_filter (np.array): ponderate weigth for reduction
319 Return:
320 reduced_ndarray: reduced object
321 """
323 ndarray = deepcopy(ndarray)
324 if reduc_filter is None:
325 return ndarray
327 reduc_filter = np.array(reduc_filter)
329 dim_g = ndarray.shape[-1]
330 list_reduced_ndarray_by_dim = []
331 # Pour chaque dimension de s
332 for i in range(dim_g):
333 # Incompatibility test :
334 if len(reduc_filter) != ndarray.shape[1]:
335 print(
336 "error expect size of reduc filter : ",
337 str(ndarray.shape[1]),
338 "current size :",
339 str(len(reduc_filter)),
340 )
341 break
343 filt = reduc_filter / reduc_filter.sum()
344 ndarray[:, :, i] = ndarray[:, :, i] * filt
345 if roll:
346 reduced_ndarray = ndarray[:, 0, i]
347 for j in range(ndarray.shape[1] - 1):
348 reduced_ndarray += np.roll(ndarray[:, j + 1, i], (j + 1) * roll, axis=0)
349 else:
350 ndarray[:, :, i] = ndarray[:, :, i] * filt
351 reduced_ndarray = ndarray[:, :, i].sum(axis=1)
353 list_reduced_ndarray_by_dim.append(reduced_ndarray[:, None])
354 reduced_ndarray = np.concatenate(list_reduced_ndarray_by_dim, axis=1)
355 return reduced_ndarray
358def apply_mask(list_or_array_or_none, mask, axis=0, mode="bool_array"):
359 """Select subpart of an array/list_of_array/tupple using a mask and np.take function.
360 If list_or_array_or_none is array, then direclty apply selection on it.
361 else if it is a Tupple or list of array apply it on array in the list/tupple structure
363 Args:
364 list_or_array_or_none (_type_): ND array (list or tupple or ND.array)
365 mask (_type_): Mask as boolean array or indices array.
366 axis (int, optional): axis on sub array to apply. Defaults to 1.
367 mode (str, optional):if 'bool_array', turn bool_array_mask into an indice_array. Defaults to 'bool_array'.
369 Returns:
370 (List or Tuple or array or Ndarray) : Sub_selected list of array or Ndarray
371 """
373 if mode == "bool_array":
374 indices = np.arange(len(mask))[mask]
376 if type(list_or_array_or_none) in [list, tuple]:
377 return [np.take(array, indices, axis=axis) for array in list_or_array_or_none]
378 if list_or_array_or_none is None:
379 return None
380 else:
381 return np.take(list_or_array_or_none, indices, axis=axis)
384def base_cos_freq(array, freq=[2]):
385 """Transform 1D modulo features [1, N] in cyclic (cos,sin) features for given freq"""
386 features = []
387 for i in freq:
388 features.append(np.cos(i * math.pi * array))
389 features.append(np.sin(i * math.pi * array))
390 return np.concatenate(features).reshape(len(freq) * 2, -1).T
393# Preprocessing function
396def apply_conv(score, filt=None, reduc_filter=None):
397 """Apply naivly by dimension a convolution to s using filt as filter
399 Args:
400 s (np.array): score to 1D convolute
401 filt (np.array): filter to apply
403 Returns:
404 s: convoluted score
405 """
406 if filt is None:
407 return score
409 if len(score.shape) == 3:
410 dim_g = score.shape[-1]
411 # Pour chaque dimension de s
412 for t in range(score.shape[1]):
413 for i in range(score.shape[2]):
414 score[:, t, i] = np.convolve(score[:, t, i], filt, mode="same")
415 else:
416 dim_g = score.shape[-1]
417 # Pour chaque dimension de s
418 for i in range(dim_g):
419 score[:, i] = np.convolve(score[:, i], filt, mode="same")
420 return score
423def cut(values, cut_min, cut_max):
424 """Apply percentile minimal and maximal threeshold
426 Args:
427 values (_type_): values to cut
428 cut_min (_type_): percentile of the minimam cut threeshold
429 cut_max (_type_): percentile of the maximal cut threeshold
431 Returns:
432 _type_: _description_
433 """
435 if values is None:
436 return None
438 vpmin = np.quantile(values, cut_min, axis=0)
439 vpmax = np.quantile(values, cut_max, axis=0)
440 values = np.minimum(values, vpmax)
441 values = np.maximum(values, vpmin)
442 return values
445def format_data(self, X, y, fit=False, mode=None, flag_inverse=False):
446 """Feature and target Formatting"""
447 if self.rescale:
448 flag_reshape = False
449 if y is not None:
450 if len(y.shape) == 1:
451 flag_reshape = True
452 y = y[:, None]
454 if fit:
455 scalerX = StandardScaler(with_mean=True, with_std=True)
456 X = scalerX.fit_transform(X)
457 scalerY = StandardScaler(with_mean=True, with_std=True)
458 y = scalerY.fit_transform(y)
459 self.scaler = [scalerX, scalerY]
461 elif not flag_inverse:
462 if X is not None:
463 X = self.scaler[0].transform(X)
464 if y is not None:
465 y = self.scaler[1].transform(y)
466 else:
467 X_transformer = self.scaler[0].inverse_transform
468 Y_transformer = self.scaler[1].inverse_transform
469 if X is not None and not len(X) == 0:
470 X = X_transformer(X)
472 if y is not None and not len(y) == 0:
473 sigma = np.sqrt(self.scaler[1].var_)
474 if (mode == "sigma") | (mode == "var") | (mode == "var_A&E"):
475 y = y * sigma
477 elif mode == "2sigma":
478 y_reshape = np.moveaxis(y, -1, 0)
479 if len(y.shape) == 3:
480 y = np.concatenate(
481 [
482 np.expand_dims(i, 0)
483 for i in [y_reshape[0] * sigma, y_reshape[1] * sigma]
484 ],
485 axis=0,
486 )
487 else:
488 y = np.concatenate(
489 [
490 y_reshape[0] * sigma,
491 y_reshape[1] * sigma,
492 ],
493 axis=-1,
494 )
495 elif mode == "quantile":
496 y_bot = Y_transformer(y[:, 0])
497 y_top = Y_transformer(y[:, 1])
498 y = np.concatenate([y_bot, y_top], axis=1)
500 else:
501 y = Y_transformer(y)
503 if flag_reshape:
504 y = y[:, 0]
505 return (X, y)
508# PIs basics computation functionalities
511def compute_born(y_pred, sigma, alpha, mode="sigma"):
512 """Compute y_upper and y_lower boundary from gaussian hypothesis (sigma or 2sigma)
514 Args:
515 y_pred (array) : Mean prediction
516 sigma (array) : Variance estimation
517 alpha (float) : Misscoverage ratio
518 mode (str) : Distribution hypothesis
519 (sigma : gaussian residual hypothesis, 2sigma : gaussian positive and negative residual hypothesis)
521 Returns:
522 (y_lower,y_upper): Lower and upper bondary of Predictive interval
523 """
524 # Case sigma
525 if mode == "sigma":
526 y_lower = y_pred + scipy.stats.norm.ppf((alpha / 2), 0, sigma)
527 y_upper = y_pred + scipy.stats.norm.ppf((1 - (alpha / 2)), 0, sigma)
528 # Case 2 sigma
529 elif mode == "2sigma":
530 sigma = np.moveaxis(sigma, -1, 0)
531 y_lower = y_pred + scipy.stats.norm.ppf((alpha / 2), 0, sigma[0])
532 y_upper = y_pred + scipy.stats.norm.ppf((1 - (alpha / 2)), 0, sigma[1])
533 else:
534 raise NotImplementedError("This mode is not yet implemented.")
535 return y_lower, y_upper
538# Gaussian mixture quantile estimation :
539def mixture_quantile(pred, var_A, quantiles, n_jobs=5):
540 def aux_mixture_quantile(pred, var_A, quantiles):
541 list_q = []
542 n_data = pred.shape[1]
543 n_mixture = pred.shape[0]
544 for n in range(n_data):
545 mean_law = scipy.stats.norm(
546 pred[:, n, 0].mean(), np.sqrt(var_A[:, n, 0].mean())
547 )
548 xmin = mean_law.ppf(0.0000001)
549 xmax = mean_law.ppf(0.9999999)
550 scale = np.arange(xmin, xmax, (xmax - xmin) / 300)
551 Mixture_cdf = np.zeros(len(scale))
552 for i in range(n_mixture):
553 cur_law = scipy.stats.norm(pred[i, n, 0], np.sqrt(var_A[i, n, 0]))
554 Mixture_cdf += cur_law.cdf(scale) / n_mixture
555 q_val = []
556 for q in quantiles:
557 q_val.append(scale[np.abs(Mixture_cdf - q).argmin()])
558 list_q.append(q_val)
559 return np.array(list_q)
561 list_q = []
562 n_data = pred.shape[1]
563 parallel_partition = np.array_split(np.arange(n_data), 3)
564 # Split inputs of auxillar parralel tree statistics extraction
565 parallel_input = []
566 for partition in parallel_partition:
567 parallel_input.append((pred[:, partition], var_A[:, partition], quantiles))
568 list_q = Parallel(n_jobs=n_jobs)(
569 delayed(aux_mixture_quantile)(*inputs) for inputs in parallel_input
570 )
571 return np.concatenate(list_q, axis=0)
574# Scikit tuning function
577def aux_tuning(
578 model,
579 X,
580 Y,
581 params=None,
582 score="neg_mean_squared_error",
583 n_esti=100,
584 folds=4,
585 random_state=None,
586):
587 """Random_search with sequential k-split
589 Args:
590 model (scikit model): Estimator
591 X ([type]): Features
592 Y ([type]): Target
593 params ([type], optional): parameter_grid. Defaults to None.
594 score (str, optional): score. Defaults to 'neg_mean_squared_error'.
595 n_esti (int, optional): Number of grid try . Defaults to 100.
596 folds (int, optional): Number of sequential fold. Defaults to 4.
597 verbose (int, optional): [description]. Defaults to 0.
598 random_state (bool) : handle experimental random using seeds.
599 """
600 if isinstance(params, type(None)):
601 return model
602 tscv = TimeSeriesSplit(n_splits=folds)
603 random_search = RandomizedSearchCV(
604 model,
605 param_distributions=params,
606 n_iter=n_esti,
607 scoring=score,
608 n_jobs=8,
609 cv=tscv.split(X),
610 verbose=1,
611 random_state=random_state,
612 )
613 Y = np.squeeze(Y)
614 random_search.fit(X, Y)
615 return random_search.best_estimator_
618def agg_list(list_: Iterable):
619 try:
620 return np.concatenate(list_, axis=0)
621 except ValueError:
622 return None
625def agg_func(list_: Iterable):
626 try:
627 return np.mean(list_, axis=0)
628 except TypeError:
629 return None
632class GenericCalibrator:
633 def __init__(self, type_res="res", mode="symetric", name=None, alpha=0.1):
634 """Generic calibrator implementing several calibratio
636 Args:
637 type_res (str, optional): type of score
638 "no_calib : No calibration
639 "res" : Calibration based on mean residuals
640 "w_res" : Calibration based on weigthed mean residuals
641 "cqr" : Calibration based on quantile residuals
643 mode (str, optional):
644 if "symetric" : symetric calibration ()
645 else perform calibration independently on positive and negative residuals.
646 name (_type_, optional): _description_. Defaults to None.
647 alpha (float, optional): _description_. Defaults to 0.1.
648 """
649 super().__init__()
650 self._residuals = None
651 self.name = name
652 if name is None:
653 self.name = type_res + "_" + mode
654 self.mode = mode
655 self.alpha = alpha
656 self.type_res = type_res
658 if mode == "symetric":
659 self.fcorr = 1
660 else:
661 self.fcorr_lower = 1
662 self.fcorr_upper = 1
664 def estimate(
665 self, y_true, y_pred, y_pred_lower, y_pred_upper, sigma_pred, **kwargs
666 ):
667 flag_res_lower = np.zeros(y_true.shape)
668 flag_res_upper = np.zeros(y_true.shape)
670 if (self.type_res == "res") | (self.type_res == "w_res"):
671 if self.type_res == "res":
672 residuals = y_true - y_pred
673 sigma_pred = y_true * 0 + 1
675 if self.type_res == "w_res":
676 # sigma_pred = np.maximum(y_pred_upper - y_pred_lower, EPSILON) / 2
677 residuals = y_true - y_pred
679 if len(residuals.shape) == 1:
680 flag_res_lower = residuals <= 0
681 flag_res_upper = residuals >= 0
683 else:
684 flag_res_lower = np.concatenate(
685 [
686 np.expand_dims(residuals[:, i] <= 0, -1)
687 for i in range(0, y_true.shape[1])
688 ]
689 )
690 flag_res_upper = np.concatenate(
691 [
692 np.expand_dims(residuals[:, i] >= 0, -1)
693 for i in range(0, y_true.shape[1])
694 ]
695 )
697 if y_pred.shape != sigma_pred.shape:
698 sigma_pred = np.moveaxis(sigma_pred, -1, 0)
699 residuals[flag_res_lower] = (
700 np.abs(residuals)[flag_res_lower] / (sigma_pred[0])[flag_res_lower]
701 )
702 residuals[flag_res_upper] = (
703 np.abs(residuals)[flag_res_upper] / (sigma_pred[1])[flag_res_upper]
704 )
705 else:
706 residuals = np.abs(residuals) / (sigma_pred)
708 elif self.type_res == "cqr":
709 residuals = np.maximum(y_pred_lower - y_true, y_true - y_pred_upper)
710 flag_res_lower = (y_pred_lower - y_true) >= (y_true - y_pred_upper)
711 flag_res_upper = (y_pred_lower - y_true) <= (y_true - y_pred_upper)
713 elif self.type_res == "no_calib":
714 return
716 else:
717 print("Unknown type_res")
718 return
720 if self.mode == "symetric":
721 self.fcorr = np.quantile(
722 residuals, (1 - self.alpha) * (1 + 1 / len(residuals))
723 )
725 else:
726 self.fcorr_lower = np.quantile(
727 residuals[flag_res_lower],
728 np.minimum((1 - self.alpha) * (1 + 1 / flag_res_lower.sum()), 1),
729 )
731 self.fcorr_upper = np.quantile(
732 residuals[flag_res_upper],
733 np.minimum((1 - self.alpha) * (1 + 1 / flag_res_upper.sum()), 1),
734 )
735 return
737 def calibrate(self, y_pred, y_pred_lower, y_pred_upper, sigma_pred, **kwargs):
738 if self.type_res == "res":
739 sigma_pred = y_pred * 0 + 1
741 if self.mode == "symetric":
742 fcorr_lower = self.fcorr
743 fcorr_upper = self.fcorr
744 else:
745 fcorr_lower = self.fcorr_lower
746 fcorr_upper = self.fcorr_upper
748 if self.type_res in ["res", "w_res", "no_calib"]:
749 if y_pred.shape != sigma_pred.shape:
750 sigma_pred = np.moveaxis(sigma_pred, -1, 0)
751 y_pred_lower = y_pred - sigma_pred[0] * fcorr_lower
752 y_pred_upper = y_pred + sigma_pred[1] * fcorr_upper
753 else:
754 y_pred_lower = y_pred - sigma_pred * fcorr_lower
755 y_pred_upper = y_pred + sigma_pred * fcorr_upper
757 elif self.type_res in ["cqr"]:
758 y_pred_upper = y_pred_upper + fcorr_upper
759 y_pred_lower = y_pred_lower - fcorr_lower
760 else:
761 print("Unknown type_res")
762 return
764 return y_pred_lower, y_pred_upper
767def dimensional_reduction(data, mode="umap", reducer=None, fit=True, **kwargs):
768 if "n_components" not in kwargs.keys():
769 kwargs["n_components"] = 3
771 if reducer is not None:
772 pass
774 else:
775 if mode == "umap":
776 if "n_neighbors" not in kwargs:
777 kwargs["n_neighbors"] = 50
778 if "min_dist" not in kwargs:
779 kwargs["min_dist"] = 0.01
780 # reducer = umap.UMAP(**kwargs)
782 elif mode == "tsne":
783 reducer = TSNE(**kwargs)
785 elif mode == "pca":
786 reducer = PCA(**kwargs)
788 embedding = reducer.fit_transform(data)
789 return (embedding, reducer)
792# Moving average function for Lag feature extraction
795def fit_compute_lag(Y, lag=[1, 2, 3], delay=0):
796 """Create lag features from a numerical array
797 Args:
798 Y (float array): Target to extract lag-feature
799 lag (int, optional): Lag number. Defaults to 3.
800 delay (int, optional): Delay before 1 lag feature. Defaults to 0.
801 """
802 dim = Y.shape
803 new_features_list = []
804 new_features_name = []
805 for i in np.array(lag) + delay:
806 Y_cur = np.roll(Y, i, axis=0)
807 if i > 0:
808 Y_cur[0:i] = 0
809 for g in range(dim[1]):
810 new_features_list.append(Y_cur[:, g])
811 new_features_name.append("lag_" + str(i) + "_dim:" + str(g))
812 new_features_name = np.array(new_features_name)
813 return (np.array(new_features_list).T, new_features_name)
816def autocorr(x):
817 "Compute autocorrelation of x"
818 result = np.correlate(x, x, mode="full")
819 return result[int(result.size / 2):]
822def convolute_1D(array, filter=None):
823 """Convolution by dimension for 1 od 2D array using np.convolve
825 Args:
826 array (_type_): array_to_convolve
827 filter (_type_, optional): convolution fitler. Defaults to None.
829 Raises:
830 ValueError: dimension error
832 Returns:
833 array: array_convoluted
834 """
835 if filter is None:
836 return array
837 else:
838 array_convoluted = []
839 if len(array.shape) == 1:
840 array_convoluted = np.convolve(array, filter, "same")
841 elif len(array.shape) == 2:
842 for i in range(array.shape[1]):
843 array_convoluted.append(
844 np.convolve(array[:, i], filter, "same")[:, None]
845 )
846 array_convoluted = np.concatenate(array_convoluted, axis=1)
847 else:
848 print("Inapropriate size of array : hold 1 or 2d array")
849 raise ValueError
850 return array_convoluted
853def _merge_config(default_cfg, user_cfg):
854 """Return a deep-merged copy of default_cfg updated with user_cfg."""
855 if user_cfg is None:
856 return copy.deepcopy(default_cfg)
858 cfg = copy.deepcopy(default_cfg)
859 for key, value in user_cfg.items():
860 if (
861 key in cfg
862 and isinstance(cfg[key], dict)
863 and isinstance(value, dict)
864 ):
865 cfg[key].update(value)
866 else:
867 cfg[key] = value
868 return cfg
871def _merge_nested(default_cfg, user_cfg):
872 if user_cfg is None:
873 return default_cfg.copy()
874 cfg = default_cfg.copy()
875 for k, v in user_cfg.items():
876 if isinstance(v, dict) and isinstance(cfg.get(k), dict):
877 sub = cfg[k].copy()
878 sub.update(v)
879 cfg[k] = sub
880 else:
881 cfg[k] = v
882 return cfg