Coverage for tdaad/topological_embedding.py: 100%
22 statements
« prev ^ index » next coverage.py v7.9.1, created at 2025-06-13 13:45 +0000
« prev ^ index » next coverage.py v7.9.1, created at 2025-06-13 13:45 +0000
1"""Topological Embedding Transformers."""
3# Author: Martin Royer
5from sklearn.preprocessing import StandardScaler, FunctionTransformer
6from sklearn.compose import ColumnTransformer
7from sklearn.cluster import KMeans
9from gudhi.representations.vector_methods import Atol
11from tdaad.persistencediagram_transformer import PersistenceDiagramTransformer
12from tdaad.utils.local_pipeline import LocalPipeline
13from tdaad.utils.window_functions import sliding_window_ppl
16atol_vanilla_fit = Atol.fit
19def local_atol_fit(self, X, y=None, sample_weight=None):
20 """ local modification to prevent FutureWarning triggered by np.concatenate(X) when X is a pd.Series."""
21 if hasattr(X, "values"):
22 X = X.values
23 return atol_vanilla_fit(self=self, X=X)
26Atol.fit = local_atol_fit
29class TopologicalEmbedding(LocalPipeline):
30 """Topological embedding for multiple time series.
32 Slices time series into smaller time series windows, forms an affinity matrix on each window
33 and applies a Rips procedure to produce persistence diagrams for each affinity
34 matrix. Then uses Atol [ref:Atol] on each dimension through the
35 gudhi.representation.Archipelago representation to produce topological vectorization.
37 Read more in the :ref:`User Guide <topological_embedding>`.
39 Parameters
40 ----------
41 window_size : int, default=40
42 Size of the sliding window algorithm to extract subsequences as input to named_pipeline.
43 step : int, default=5
44 Size of the sliding window steps between each window.
45 n_centers_by_dim : int, default=5
46 The number of centroids to generate by dimension for vectorizing topological features.
47 The resulting embedding will have total dimension =< tda_max_dim * n_centers_by_dim.
48 The resulting embedding dimension might be smaller because of the KMeans algorithm in the Archipelago step.
49 tda_max_dim : int, default=2
50 The maximum dimension of the topological feature extraction.
52 Examples
53 ----------
54 >>> n_timestamps = 100
55 >>> n_sensors = 5
56 >>> timestamps = pd.to_datetime('2024-01-01', utc=True) + pd.Timedelta(1, 'h') * np.arange(n_timestamps)
57 >>> X = pd.DataFrame(np.random.random(size=(n_timestamps, n_sensors)), index=timestamps)
58 >>> TopologicalEmbedding(n_centers_by_dim=2, tda_max_dim=1).fit_transform(X)
59 """
61 def __init__(
62 self,
63 window_size: int = 40,
64 step: int = 5,
65 tda_max_dim: int = 2,
66 n_centers_by_dim: int = 5,
67 ):
68 self.window_size = window_size
69 self.step = step
70 self.tda_max_dim = tda_max_dim
71 self.n_centers_by_dim = n_centers_by_dim
72 named_ppl = PersistenceDiagramTransformer(
73 tda_max_dim=self.tda_max_dim,
74 )
75 super().__init__(steps=[
76 ("StandardScaler",
77 StandardScaler()
78 ),
79 ("SlidingPersistenceDiagramTransformer",
80 FunctionTransformer(func=sliding_window_ppl, kw_args={
81 "window_size": self.window_size, "step": self.step, "pipeline": named_ppl})
82 ),
83 ("Archipelago",
84 ColumnTransformer(
85 [(f"Atol{i}",
86 Atol(quantiser=KMeans(n_clusters=self.n_centers_by_dim, random_state=202312, n_init="auto")), i)
87 for i in range(self.tda_max_dim + 1)])
88 ),
89 ])
90 super().set_output(transform="pandas")