Coverage for tdaad/topological_embedding.py: 100%
37 statements
« prev ^ index » next coverage.py v7.11.0, created at 2025-10-16 16:23 +0000
« prev ^ index » next coverage.py v7.11.0, created at 2025-10-16 16:23 +0000
1"""Topological Embedding Transformers."""
3# Author: Martin Royer
5from functools import partial
7from sklearn.base import BaseEstimator, TransformerMixin
8from sklearn.pipeline import Pipeline
9from sklearn.preprocessing import StandardScaler
10from sklearn.preprocessing import FunctionTransformer
11from sklearn.compose import ColumnTransformer
12from sklearn.cluster import KMeans
14from gudhi.representations.vector_methods import Atol
16from tdaad.utils.tda_functions import transform_to_persistence_diagram
17from tdaad.utils.window_functions import sliding_window_ppl_pp
20atol_vanilla_fit = Atol.fit
23def local_atol_fit(self, X, y=None, sample_weight=None):
24 """local modification to prevent FutureWarning triggered by np.concatenate(X) when X is a pd.Series."""
25 if hasattr(X, "values"):
26 X = X.values
27 return atol_vanilla_fit(self=self, X=X)
30Atol.fit = local_atol_fit
33class TopologicalEmbedding(BaseEstimator, TransformerMixin):
34 """Topological embedding for multiple time series.
36 Slices time series into smaller time series windows, forms an affinity matrix on each window
37 and applies a Rips procedure to produce persistence diagrams for each affinity
38 matrix. Then uses Atol [ref:Atol] on each dimension through the
39 gudhi.representation.Archipelago representation to produce topological vectorization.
41 Read more in the :ref:`User Guide <topological_embedding>`.
43 Parameters
44 ----------
45 window_size : int, default=40
46 Size of the sliding window algorithm to extract subsequences as input to named_pipeline.
47 step : int, default=5
48 Size of the sliding window steps between each window.
49 n_centers_by_dim : int, default=5
50 The number of centroids to generate by dimension for vectorizing topological features.
51 The resulting embedding will have total dimension =< tda_max_dim * n_centers_by_dim.
52 The resulting embedding dimension might be smaller because of the KMeans algorithm in the Archipelago step.
53 tda_max_dim : int, default=2
54 The maximum dimension of the topological feature extraction.
56 Examples
57 ----------
58 >>> n_timestamps = 100
59 >>> n_sensors = 5
60 >>> timestamps = pd.to_datetime('2024-01-01', utc=True) + pd.Timedelta(1, 'h') * np.arange(n_timestamps)
61 >>> X = pd.DataFrame(np.random.random(size=(n_timestamps, n_sensors)), index=timestamps)
62 >>> TopologicalEmbedding(n_centers_by_dim=2, tda_max_dim=1).fit_transform(X)
63 """
65 def __init__(
66 self,
67 window_size: int = 40,
68 step: int = 5,
69 tda_max_dim: int = 2,
70 n_centers_by_dim: int = 5,
71 ):
72 self.window_size = window_size
73 self.step = step
74 self.tda_max_dim = tda_max_dim
75 self.n_centers_by_dim = n_centers_by_dim
77 def _build_pipeline(self):
78 steps = []
79 steps.append(("Standard scaler", StandardScaler()))
80 func = partial(transform_to_persistence_diagram, tda_max_dim=self.tda_max_dim)
81 steps.append(
82 (
83 "Sliding persistence diagram transformer",
84 FunctionTransformer(
85 func=sliding_window_ppl_pp,
86 kw_args={
87 "window_size": self.window_size,
88 "step": self.step,
89 "func": func,
90 },
91 ),
92 )
93 )
94 steps.append(
95 (
96 "Archipelago",
97 ColumnTransformer(
98 [
99 (
100 f"Atol{i}",
101 Atol(
102 quantiser=KMeans(
103 n_clusters=self.n_centers_by_dim,
104 random_state=202312,
105 n_init="auto",
106 )
107 ),
108 i,
109 )
110 for i in range(self.tda_max_dim + 1)
111 ]
112 ),
113 )
114 )
115 return Pipeline(steps).set_output(transform="pandas")
117 def fit(self, X, y=None):
118 """
119 Fit the internal pipeline to the data.
121 Parameters
122 ----------
123 X : pandas.DataFrame
124 Input feature matrix.
126 y : array-like, optional
127 Target values (not used here, but accepted for compatibility with sklearn).
129 Returns
130 -------
131 self : object
132 Fitted transformer.
133 """
134 self.pipeline_ = self._build_pipeline()
135 self.pipeline_.fit(X, y)
136 return self
138 def transform(self, X):
139 """
140 Apply transformations to the input data using the fitted pipeline.
142 Parameters
143 ----------
144 X : pandas.DataFrame
145 Input data to transform.
147 Returns
148 -------
149 X_transformed : array-like or DataFrame
150 Transformed data.
151 """
152 return self.pipeline_.transform(X)
154 def fit_transform(self, X, y=None, **fit_params):
155 """
156 Fit to data, then transform it.
158 Returns
159 -------
160 X_transformed : array-like
161 """
162 return self.fit(X, y).transform(X)