Coverage for uqmodels/preprocessing/features_processing.py: 10%
369 statements
« prev ^ index » next coverage.py v7.10.6, created at 2025-09-05 14:29 +0000
« prev ^ index » next coverage.py v7.10.6, created at 2025-09-05 14:29 +0000
1"""
2Data preprocessing module.
3"""
5import inspect
7import numpy as np
8import pandas as pd
9import tsfresh
10from sklearn.decomposition import PCA
11from sklearn.ensemble import RandomForestRegressor
12from sklearn.preprocessing import StandardScaler
13from tsfresh import extract_features
14from tsfresh.feature_extraction import EfficientFCParameters
16import uqmodels.preprocessing.features_processing as f_proc
17from uqmodels.preprocessing.preprocessing import downscale_series, upscale_series
18from uqmodels.utils import base_cos_freq, convolute_1D, corr_matrix_array
20# import uqmodels.test as UQ_test
23def check_transform_input_to_panda(input, name=""):
24 """Check if input is dataframe.
25 if it's a np.ndarray turn it to dataframe
26 else raise error.
28 Args:
29 input (_type_): input to check or tranforaam
30 name (str, optional): name of input
32 Raises:
33 TypeError: Input have a wrong type
35 Returns:
36 input: pd.dataframe
37 """
38 if isinstance(input, np.ndarray):
39 # print('Change ndarray in pd.dataframe')
40 return pd.DataFrame(input)
42 if not isinstance(input, pd.DataFrame):
43 print("Type issues in " + inspect.stack()[1].function)
44 print(name + "should be pd.DataFrame rather than " + type(input))
45 raise TypeError
46 return input
49def normalise_panda(dataframe, mode, scaler=None):
50 """Apply normalisation on a dataframe
52 Args:
53 dataframe (_type_): _description_
54 mode (_type_): _description_
56 Returns:
57 _type_: _description_
58 """
59 if mode == "fit":
60 scaler = StandardScaler()
61 scaler.fit(dataframe.values)
62 return scaler
63 if mode == "fit_transform":
64 scaler = StandardScaler()
65 values = scaler.fit_transform(dataframe.values)
66 dataframe = pd.DataFrame(
67 values, columns=dataframe.columns, index=dataframe.index
68 )
69 return (dataframe, scaler)
70 if mode == "transform":
71 values = scaler.fit_transform(dataframe.values)
72 dataframe = pd.DataFrame(
73 values, columns=dataframe.columns, index=dataframe.index
74 )
75 return dataframe
78# Target selection from data :
81def select_tsfresh_params(
82 list_keys=["variance", "skewness", "fft", "cwt", "fourrier", "mean" "trend"]
83):
84 dict_params = dict()
85 for key in list_keys:
86 for k in EfficientFCParameters().keys():
87 if key in k:
88 dict_params[k] = EfficientFCParameters()[k]
89 return dict_params
92def select_data(data, context=None, ind_data=None, **kwargs):
93 """Select data from ind_data indice array
95 Args:
96 data (ndarray): data
97 ind_data (ind_array, optional): selected data. Defaults to None : all dim are pick
99 Returns:
100 data_selected : Ndarray that contains np.concatenation of all selected features
101 """
102 data_selected = select_data_and_context(
103 data, context=None, ind_data=ind_data, ind_context=None
104 )
105 return data_selected
108# ----------------------------------------- #
109# PCA transformation form data & context
112def select_data_and_context(
113 data, context=None, ind_data=None, ind_context=None, **kwargs
114):
115 """Select data and context using ind_data & ind_context.
117 Args:
118 data (ndarray): data
119 context (ndarray, optional): context_data. Defaults to None.
120 n_components (int, optional): n_components of pca. Defaults to 3.
121 ind_data (ind_array, optional): selected data. Defaults to None : all dim are pick
122 ind_context (ind_array, optional): seletected data context.
123 Defaults to None : all dim are pick if there is context
124 Returns:
125 data_selected : Ndarray that contains np.concatenation of all selected features
126 """
128 data = check_transform_input_to_panda(data, "data")
130 if context is not None:
131 context = check_transform_input_to_panda(context, "data")
133 data_selected = []
134 if ind_data is None:
135 pass
136 else:
137 data_selected.append(data.loc[:, ind_data])
139 if ind_context is None:
140 pass
141 else:
142 data_selected.append(context.loc[:, ind_context])
144 if len(data_selected) == 0:
145 data_selected = data
146 else:
147 data_selected = pd.concat(data_selected, axis=1)
148 return data_selected.values
151def fit_pca(
152 data,
153 context=None,
154 n_components=3,
155 data_lag=1,
156 ind_data=None,
157 ind_context=None,
158 **kwargs
159):
160 """Fit&Compute for PCA features generation: fit PCA from selected data & context.
162 Args:
163 data (ndarray): data
164 context (ndarray, optional): context_data. Defaults to None.
165 n_components (int, optional): n_components of pca. Defaults to 3.
166 ind_data (ind_array, optional): selected data.
167 Defaults to None : all dim are pick
168 ind_context (ind_array, optional): seletected data context. Defaults to None : all dim are pick
169 if there is context
170 """
171 data = np.roll(data, data_lag, axis=0)
172 data_to_reduce = select_data_and_context(
173 data=data, context=context, ind_data=ind_data, ind_context=ind_context, **kwargs
174 )
175 PCA_model = PCA(n_components=n_components)
176 PCA_model.fit(data_to_reduce)
177 return PCA_model
180def compute_pca(
181 data,
182 context=None,
183 n_components=3,
184 data_lag=1,
185 ind_data=None,
186 ind_context=None,
187 params_=None,
188 **kwargs
189):
190 """Fit&Compute for PCA features generation:
191 compute PCA from selected data & context and params which contains fitted pca.
192 if params is none call fit_pca to get a fitted PCA_model
194 Args:
195 data (ndarray): data
196 context (ndarray, optional): context_data. Defaults to None.
197 n_components (int, optional): n_components of pca. Defaults to 3.
198 ind_data (ind_array, optional): selected data. Defaults to None : all dim are pick
199 ind_context (ind_array, optional): seletected data context.
200 Defaults to None : all dim are pick if there is context
202 Return:
203 data_reduced,PCA_model
205 """
206 if params_ is None:
207 PCA_model = fit_pca(
208 data,
209 context=context,
210 n_components=n_components,
211 data_lag=data_lag,
212 ind_data=ind_data,
213 ind_context=ind_context,
214 **kwargs
215 )
216 else:
217 PCA_model = params_
219 data = np.roll(data, data_lag, axis=0)
220 data_to_reduce = select_data_and_context(
221 data=data, context=context, ind_data=ind_data, ind_context=ind_context
222 )
223 data_reduced = PCA_model.transform(data_to_reduce)
224 return data_reduced, PCA_model
227def automatic_periods_detection(array):
228 autocorr = np.argmax(pd.Series.autocorr(array) + 4) # N_ech periodicity
229 return autocorr
232def fit_compute_periods(
233 data,
234 context=None,
235 ind_data=None,
236 ind_context=None,
237 periodicities=[1],
238 freqs=[1],
239 params_=None,
240 **kwargs
241):
242 """Turn step_scale context array into cos/sin periodic features
244 Args:
245 context (_type_): context_data
246 ind_context (_type_): ind of step_scale
247 modularity (_type_): modularity of data
248 freq (list, optional): frequence of sin/cos. Defaults to [1].
249 """
250 step_scale = select_data_and_context(
251 data=data, context=context, ind_data=None, ind_context=ind_context, **kwargs
252 )
254 list_features = []
255 for period in periodicities:
256 features_tmp = base_cos_freq((step_scale % period) / period, freqs)
257 list_features.append(features_tmp)
259 list_features = np.concatenate(list_features, axis=1)
261 return (list_features, params_)
264def fit_compute_lag_values(
265 data,
266 context=None,
267 ind_data=None,
268 ind_context=None,
269 derivs=[0],
270 windows=[1],
271 lag=[0],
272 delay=0,
273 params=None,
274 **kwargs
275):
276 """Turn step_scale context array into cos/sin periodic features
278 Args:
279 context (_type_): context_data
280 ind_context (_type_): ind of step_scale
281 modularity (_type_): modularity of data
282 freq (list, optional): frequence of sin/cos. Defaults to [1].
283 """
284 selected_data = select_data_and_context(
285 data=data, context=context, ind_data=ind_data, ind_context=ind_context, **kwargs
286 )
287 MA_derivate, _ = fit_compute_MA_derivate(
288 selected_data, derivs=derivs, windows=windows
289 )
290 lag_features, _ = fit_compute_lag(MA_derivate, lag=lag, delay=delay)
291 return (lag_features, params)
294def fit_compute_MA_derivate(
295 data,
296 context=None,
297 ind_data=None,
298 ind_context=None,
299 windows=[1],
300 lags=[0],
301 derivs=[0],
302 params=None,
303 **kwargs
304):
305 """Compute a MA-values of the window last values, then apply lags, then derivates and returns values.
306 Apply a 1-lag by default
307 """
309 data = select_data_and_context(
310 data=data, context=context, ind_data=ind_data, ind_context=ind_context, **kwargs
311 )
313 dim = data.shape
314 features = []
315 for w in windows:
316 conv_array = np.roll(data, w - 1, axis=0)
317 if w > 1:
318 filter_ = np.concatenate([np.ones(w) * 1 / w])
319 lag_array = np.roll(data, w - 1, axis=0)
320 conv_array = convolute_1D(lag_array, filter_)
322 for lag in lags:
323 lag_array = conv_array
324 if lag != 0:
325 lag_array = np.roll(conv_array, lag, axis=0)
326 for deriv in derivs:
327 deriv_array = lag_array
328 if deriv != 0:
329 deriv_array = np.diff(
330 lag_array, deriv, prepend=np.ones((1, dim[1])) * data[0], axis=0
331 )
332 features.append(deriv_array)
334 deriv_features = np.stack(features).reshape(-1, dim[0], dim[1])
335 deriv_features = np.swapaxes(deriv_features, 2, 1).reshape(-1, dim[0]).T
336 return deriv_features, params
339def fit_compute_lag(
340 data,
341 context=None,
342 lag=[0],
343 delay=0,
344 ind_data=None,
345 ind_context=None,
346 params=None,
347 **kwargs
348):
349 """Create lag features from a numerical array
350 Args:
351 Y (float array): Target to extract lag-feature
352 lag (int, optional): Lag number. Defaults to 3.
353 delay (int, optional): Delay before 1 lag feature. Defaults to 0.
354 """
356 data = select_data_and_context(
357 data=data, context=context, ind_data=ind_data, ind_context=ind_context, **kwargs
358 )
360 dim = data.shape
361 new_features_list = []
362 new_features_name = []
363 for i in np.array(lag) + delay:
364 Y_cur = np.roll(data, i, axis=0)
365 if i > 0:
366 Y_cur[0:i] = 0
367 for g in range(dim[1]):
368 new_features_list.append(Y_cur[:, g])
369 new_features_name.append("lag_" + str(i) + "_dim:" + str(g))
370 new_features_name = np.array(new_features_name)
371 return np.array(new_features_list).T, params
374def mask_corr_feature_target(X, y, v_seuil=0.05):
375 v_corr = np.abs(corr_matrix_array(X, y[:, 0]))
376 for i in np.arange(y.shape[1] - 1):
377 v_corr = np.maximum(v_corr, np.abs(corr_matrix_array(X, y[:, i + 1])))
379 return v_corr > v_seuil
382def select_features_from_FI(X, y, model="RF", threesold=0.01, **kwargs):
383 if model == "RF":
384 estimator = RandomForestRegressor(
385 ccp_alpha=1e-05,
386 max_depth=15,
387 max_features=0.5,
388 max_samples=0.5,
389 min_impurity_decrease=0.0001,
390 min_samples_leaf=2,
391 min_samples_split=8,
392 n_estimators=100,
393 random_state=0,
394 )
396 estimator.fit(X, y)
397 features_mask = estimator.feature_importances_ > threesold
398 return features_mask
401def build_window_representation(y, step=1, window=10):
402 if step > 1:
403 mask = np.arange(len(y)) % step == step - 1
404 else:
405 mask = np.ones(len(y)) == 1
407 list_df = []
408 for i in range(window):
409 dict_df_ts = {
410 "id": np.arange(len(y[mask])),
411 "time": np.roll(np.arange(len(y[mask])), i),
412 }
413 for k in range(y.shape[1]):
414 dict_df_ts["value_" + str(k + 1)] = np.roll(y[:, k], i, axis=0)[mask]
415 df_ts = pd.DataFrame(dict_df_ts)
416 list_df.append(df_ts)
417 df_ts = pd.concat(list_df)
418 y_target = y[mask]
419 if len(y) == window:
420 df_ts = df_ts[df_ts["id"] == len(y) - 1]
421 y_target = y_target[-1:]
423 return (df_ts, y_target)
426def fit_tsfresh_feature_engeenering(
427 data,
428 context=None,
429 window=10,
430 step=None,
431 ts_fresh_params=None,
432 ind_data=None,
433 ind_context=None,
434 **kwargs
435):
436 data = select_data_and_context(
437 data=data, context=context, ind_data=ind_data, ind_context=ind_context
438 )
440 if step is None:
441 step = window
443 df_ts, y_target = build_window_representation(data, step, window)
444 if (ts_fresh_params) is None:
445 ts_fresh_params = EfficientFCParameters()
447 X_extracted = extract_features(
448 df_ts, ts_fresh_params, column_id="id", column_sort="time"
449 )
450 filter_ = np.isnan(X_extracted).sum(axis=0) == 0
452 X_extracted = X_extracted.loc[:, filter_]
453 filter_ = mask_corr_feature_target(X_extracted, y_target)
454 X_extracted = X_extracted.loc[:, filter_]
456 X_selected = tsfresh.feature_selection.select_features(X_extracted, y_target[:, 0])
458 for i in range(data.shape[1] - 1):
459 X_selected_tmp = tsfresh.feature_selection.select_features(
460 X_extracted, y_target[:, i + 1]
461 )
463 mask = [
464 colums_tmp not in X_selected.columns.values.tolist()
465 for colums_tmp in X_selected_tmp.columns.values
466 ]
467 X_selected = pd.concat([X_selected, X_selected_tmp.loc[:, mask]], axis=1)
469 mask_featutre_selection = select_features_from_FI(X_selected.values, y_target)
470 name_feature_selected = X_selected.columns[mask_featutre_selection].values
471 kind_to_fc_parameters = tsfresh.feature_extraction.settings.from_columns(
472 name_feature_selected
473 )
474 return kind_to_fc_parameters
477def compute_tsfresh_feature_engeenering(
478 data,
479 context=None,
480 window=10,
481 step=10,
482 ind_data=None,
483 ind_context=None,
484 params_=None,
485 **kwargs
486):
487 data = select_data_and_context(
488 data=data, context=context, ind_data=ind_data, ind_context=ind_context
489 )
491 if step is None:
492 step = window
494 if params_ is None:
495 params_ = fit_tsfresh_feature_engeenering(data, window, step)
497 df_ts, y_target = build_window_representation(data, step, window)
498 X_extracted = extract_features(
499 df_ts,
500 kind_to_fc_parameters=params_,
501 column_id="id",
502 column_sort="time",
503 disable_progressbar=True,
504 )
505 return (X_extracted.values, y_target), params_
508def fit_FE_by_estimator(
509 data,
510 context,
511 ind_data=None,
512 ind_context=None,
513 estimator=None,
514 estimator_params=dict(),
515 data_lag=[1],
516 **kwargs
517):
518 data = pd.DataFrame(np.roll(data.values, data_lag, axis=0), columns=data.columns)
519 data = select_data_and_context(
520 data=data, context=context, ind_data=ind_data, ind_context=ind_context, **kwargs
521 )
522 estimator = estimator(**estimator_params)
523 estimator.fit(data)
524 return estimator
527def compute_FE_by_estimator(
528 data,
529 context,
530 ind_data=None,
531 ind_context=None,
532 estimator=None,
533 estimator_params=dict(),
534 data_lag=[1],
535 params_=None,
536 **kwargs
537):
538 data = pd.DataFrame(np.roll(data.values, data_lag, axis=0), columns=data.columns)
540 data = select_data_and_context(
541 data=data, context=context, ind_data=ind_data, ind_context=ind_context, **kwargs
542 )
544 if params_ is None:
545 params_ = fit_FE_by_estimator(
546 data, context, ind_data, ind_context, estimator, **kwargs
547 )
548 estimator = params_
549 feature = estimator.transform(data)
550 return (feature, estimator)
553def fit_feature_engeenering(data, context=None, dict_FE_params=dict(), **kwargs):
554 if "resample_data_params" in dict_FE_params.keys():
555 resample_data_params = dict_FE_params["resample_data_params"]
556 if resample_data_params["type"] == "upscale":
557 data = upscale_series(data, **resample_data_params)
559 elif resample_data_params["type"] == "downscale":
560 data = downscale_series(data, **resample_data_params)
562 if "resample_context_params" in dict_FE_params.keys():
563 resample_context_params = dict_FE_params["resample_context_params"]
564 if resample_data_params["type"] == "upscale":
565 context = upscale_series(context, **resample_context_params)
567 elif resample_data_params["type"] == "downscale":
568 context = downscale_series(context, **resample_context_params)
570 list_features = []
571 if "raw_selection" in dict_FE_params.keys():
572 if not isinstance(dict_FE_params["raw_selection"], list):
573 dict_FE_params["raw_selection"] = [dict_FE_params["raw_selection"]]
575 for dict_params in dict_FE_params["raw_selection"]:
576 feature_tmp = select_data_and_context(
577 data=data, context=context, **dict_params
578 )
579 list_features.append(feature_tmp)
581 if "periods" in dict_FE_params.keys():
582 if not isinstance(dict_FE_params["periods"], list):
583 dict_FE_params["periods"] = [dict_FE_params["periods"]]
584 for dict_params in dict_FE_params["periods"]:
585 feature_tmp, _ = fit_compute_periods(data, context=context, **dict_params)
586 dict_params["params_"] = None
587 list_features.append(feature_tmp)
589 if "MA_derivate" in dict_FE_params.keys():
590 if not isinstance(dict_FE_params["MA_derivate"], list):
591 dict_FE_params["MA_derivate"] = [dict_FE_params["MA_derivate"]]
592 for dict_params in dict_FE_params["MA_derivate"]:
593 feature_tmp, _ = fit_compute_MA_derivate(
594 data, context=context, **dict_params
595 )
596 dict_params["params_"] = None
597 list_features.append(feature_tmp)
598 print(feature_tmp.shape)
600 if "FE_by_estimator" in dict_FE_params.keys():
601 if not isinstance(dict_FE_params["FE_by_estimator"], list):
602 dict_FE_params["FE_by_estimator"] = [dict_FE_params["FE_by_estimator"]]
604 for dict_params in dict_FE_params["FE_by_estimator"]:
605 params_ = fit_FE_by_estimator(data, context=context, **dict_params)
606 dict_params["params_"] = params_
607 feature_tmp, _ = compute_FE_by_estimator(data, context, **dict_params)
608 list_features.append(feature_tmp)
610 if "MV_features" in dict_FE_params.keys():
611 if not isinstance(dict_FE_params["MV_features"], list):
612 dict_FE_params["MV_features"] = [dict_FE_params["MV_features"]]
614 for dict_params in dict_FE_params["MV_features"]:
615 params_ = fit_MV_features(data, context=context, **dict_params)
616 dict_params["params_"] = params_
617 feature_tmp, _ = compute_MV_features(data, context, **dict_params)
618 list_features.append(feature_tmp)
620 if "ctx_features" in dict_FE_params.keys():
621 if not isinstance(dict_FE_params["ctx_features"], list):
622 dict_FE_params["ctx_features"] = [dict_FE_params["ctx_features"]]
624 for dict_params in dict_FE_params["ctx_features"]:
625 params_ = fit_ctx_features(data, context=context, **dict_params)
626 dict_params["params_"] = params_
627 feature_tmp, _ = compute_ctx_features(data, context, **dict_params)
628 list_features.append(feature_tmp)
630 if "ts_fresh" in dict_FE_params.keys():
631 if not isinstance(dict_FE_params["ts_fresh"], list):
632 dict_FE_params["ts_fresh"] = [dict_FE_params["ts_fresh"]]
634 for dict_params in dict_FE_params["ts_fresh"]:
635 ts_fresh_params_ = fit_tsfresh_feature_engeenering(
636 data, context=context, **dict_params
637 )
638 dict_params["params_"] = ts_fresh_params_
639 dict_params["step"] = 1
640 (feature_tmp, _), _ = compute_tsfresh_feature_engeenering(
641 data, context=context, **dict_params
642 )
643 list_features.append(feature_tmp)
645 X = np.concatenate(list_features, axis=1)
646 if "selection" in dict_FE_params.keys():
647 dict_FE_params["selection"]["params_"] = select_features_from_FI(
648 X, data, **dict_FE_params["selection"]
649 )
650 return dict_FE_params
653def compute_feature_engeenering(
654 data, context=None, dict_FE_params=dict(), params_=None
655):
656 if "resample_data_params" in dict_FE_params.keys():
657 resample_data_params = dict_FE_params["resample_data_params"]
658 if resample_data_params["type"] == "upscale":
659 data = upscale_series(data, **resample_data_params)
661 elif resample_data_params["type"] == "downscale":
662 data = downscale_series(data, **resample_data_params)
664 if "resample_context_params" in dict_FE_params.keys():
665 resample_context_params = dict_FE_params["resample_context_params"]
666 if resample_data_params["type"] == "upscale":
667 context = upscale_series(context, **resample_context_params)
669 elif resample_data_params["type"] == "downscale":
670 context = downscale_series(context, **resample_context_params)
672 if params_ is None:
673 params_ = fit_feature_engeenering(data, context, dict_FE_params=dict_FE_params)
674 dict_FE_params = params_
676 list_features = []
677 if "raw_selection" in dict_FE_params.keys():
678 for dict_params in dict_FE_params["raw_selection"]:
679 list_features.append(
680 select_data_and_context(data=data, context=context, **dict_params)
681 )
683 if "periods" in dict_FE_params.keys():
684 for dict_params in dict_FE_params["periods"]:
685 feature_tmp, _ = fit_compute_periods(data, context=context, **dict_params)
686 list_features.append(feature_tmp)
688 if "MA_derivate" in dict_FE_params.keys():
689 for dict_params in dict_FE_params["MA_derivate"]:
690 feature_tmp, _ = fit_compute_MA_derivate(
691 data, context=context, **dict_params
692 )
693 list_features.append(feature_tmp)
695 if "MV_features" in dict_FE_params.keys():
696 for dict_params in dict_FE_params["MV_features"]:
697 feature_tmp, _ = compute_MV_features(data, context=context, **dict_params)
698 list_features.append(feature_tmp)
700 if "ctx_features" in dict_FE_params.keys():
701 for dict_params in dict_FE_params["ctx_features"]:
702 feature_tmp, _ = compute_ctx_features(data, context=context, **dict_params)
703 list_features.append(feature_tmp)
705 if "ts_fresh" in dict_FE_params.keys():
706 for dict_params in dict_FE_params["ts_fresh"]:
707 (feature_tmp, _), _ = compute_tsfresh_feature_engeenering(
708 data, context=context, **dict_params
709 )
710 list_features.append(feature_tmp)
712 X = np.concatenate(list_features, axis=1)
713 if "selection" in dict_FE_params.keys():
714 mask = dict_FE_params["selection"]["params_"]
715 X = X[:, mask]
717 return X, dict_FE_params
720def get_FE_params(delta=None):
721 """Provide defaults parameters for features engenering
723 Args:
724 delta (_type_, optional): resample step parameters
725 """
727 dict_FE = {
728 "raw_selection": {"ind_context": [3]},
729 "fit_compute_MA_derivate": {
730 "windows": [1, 10, 60],
731 "lags": [1],
732 "derivs": [0, 1, 10],
733 },
734 "periods": {"ind_context": [0], "periodicities": [10, 100], "freqs": [1, 2]},
735 "ts_fresh": {
736 "window": 20,
737 "step": 5,
738 "ts_fresh_params": f_proc.select_tsfresh_params(["mean", "cwt"]),
739 },
740 }
741 if delta is not None:
742 dict_FE["resample_data_params"] = {
743 "type": "downscale",
744 "delta": delta,
745 "mode": "mean",
746 }
747 dict_FE["resample_context_params"] = {
748 "type": "downscale",
749 "delta": delta,
750 "mode": "first",
751 }
752 return dict_FE
755def fit_MV_features(
756 data,
757 context,
758 ind_data=None,
759 ind_context=None,
760 focus=None,
761 n_components=3,
762 n_neighboor=4,
763 lags=[0],
764 derivs=[0],
765 windows=[1],
766 **kwargs
767):
768 """Naive FE function : Fit function to select features having stronger correlation to targets,
769 plus compute PCA synthesis of them
771 Args:
772 data (_type_): _description_
773 context (_type_): _description_
774 ind_data (_type_, optional): _description_. Defaults to None.
775 ind_context (_type_, optional): _description_. Defaults to None.
776 focus (_type_, optional): _description_. Defaults to None.
777 n_components (int, optional): _description_. Defaults to 3.
778 n_neighboor (int, optional): _description_. Defaults to 4.
779 lags (list, optional): _description_. Defaults to [0].
780 derivs (list, optional): _description_. Defaults to [0].
781 windows (list, optional): _description_. Defaults to [1].
783 Returns:
784 _type_: _description_
785 """
786 series = select_data(data, context, ind_data=ind_data, ind_context=ind_context)
787 ind_focus = ind_data.index(focus)
789 order = None
790 if n_neighboor > 0:
791 print(series.shape)
792 corr_matrice = np.corrcoef(series, rowvar=False)
793 order = np.argsort(corr_matrice[ind_focus])[::-1][1:]
795 estimator = None
796 scaler = None
797 if n_components > 0:
798 estimator = PCA(n_components=n_components)
799 scaler = StandardScaler()
800 series = scaler.fit_transform(series)
801 estimator.fit(series)
803 return order, scaler, estimator
806def compute_MV_features(
807 data,
808 context,
809 ind_data=None,
810 ind_context=None,
811 focus=None,
812 n_components=3,
813 n_neighboor=4,
814 lags=[0],
815 derivs=[0],
816 windows=[1],
817 params_=None,
818 **kwargs
819):
820 """Naive FE function : Fit function to select features having stronger correlation to targets,
821 plus compute PCA synthesis of them
823 Args:
824 data (_type_): _description_
825 context (_type_): _description_
826 ind_data (_type_, optional): _description_. Defaults to None.
827 ind_context (_type_, optional): _description_. Defaults to None.
828 focus (_type_, optional): _description_. Defaults to None.
829 n_components (int, optional): _description_. Defaults to 3.
830 n_neighboor (int, optional): _description_. Defaults to 4.
831 lags (list, optional): _description_. Defaults to [0].
832 derivs (list, optional): _description_. Defaults to [0].
833 windows (list, optional): _description_. Defaults to [1].
834 params_ (_type_, optional): _description_. Defaults to None.
836 Returns:
837 _type_: _description_
838 """
839 if params_ is None:
840 params_ = fit_MV_features(
841 data,
842 context,
843 ind_data=ind_data,
844 ind_context=ind_context,
845 focus=focus,
846 n_components=n_components,
847 )
849 order, scaler, estimator = params_
851 series = select_data(data, context, ind_data=ind_data, ind_context=ind_context)
853 list_series = []
854 if n_neighboor > 0:
855 list_series.append(series[:, order[:n_neighboor]])
857 if n_components > 0:
858 series = scaler.transform(series)
859 pca_series = estimator.transform(series)
860 list_series.append(pca_series)
862 if len(list_series) == 1:
863 list_series = list_series[0]
864 else:
865 list_series = np.concatenate(list_series, axis=1)
867 list_series, _ = fit_compute_MA_derivate(
868 list_series, derivs=derivs, lags=lags, windows=windows
869 )
871 return (list_series, params_)
874def fit_ctx_features(
875 data, context, ind_data=None, ind_context=None, n_components=3, lags=[0], **kwargs
876):
877 """Produce contextual information by apply a PCA on ctx_measure + nan_series if provided
879 Args:
880 list_channels (_type_): ctx_sources to synthesize 2D (times, features) array
881 nan_series (_type_, optional): nan_series : capteurs issues localisation.
882 list_target_channels (list, optional): Defaults to [0].
884 Returns:
885 X_ctx
886 """
888 selected_data = select_data(data, context, ind_data=ind_data, ind_context=context)
890 scaler = StandardScaler()
891 selected_data = scaler.fit_transform(selected_data)
892 estimator = PCA(n_components)
893 estimator.fit_transform(selected_data)
894 return scaler, estimator
897def compute_ctx_features(
898 data,
899 context,
900 ind_data=None,
901 ind_context=None,
902 n_components=3,
903 lag=0,
904 params_=None,
905 **kwargs
906):
907 """Produce contextual information by apply a PCA on ctx_measure + nan_series if provided
909 Args:
910 list_channels (_type_): ctx_sources to synthesize 2D (times, features) array
911 nan_series (_type_, optional): nan_series : capteurs issues localisation.
912 list_target_channels (list, optional): Defaults to [0].
914 Returns:
915 X_ctx
916 """
917 if params_ is None:
918 params_ = fit_ctx_features(
919 data,
920 context,
921 ind_data=ind_data,
922 ind_context=ind_context,
923 n_components=n_components,
924 )
926 scaler, estimator = params_
928 selected_data = select_data(data, context, ind_data=ind_data, ind_context=context)
930 selected_data = scaler.transform(selected_data)
931 selected_data = estimator.transform(selected_data)
933 selected_data, _ = fit_compute_lag(selected_data, lag=[lag])
935 return selected_data, params_