Coverage for tdaad/topological

1"""Topological Embedding Transformers."""

3# Author: Martin Royer

5from sklearn.preprocessing import StandardScaler, FunctionTransformer

6from sklearn.compose import ColumnTransformer

7from sklearn.cluster import KMeans

9from gudhi.representations.vector_methods import Atol

11from tdaad.persistencediagram_transformer import PersistenceDiagramTransformer

12from tdaad.utils.local_pipeline import LocalPipeline

13from tdaad.utils.window_functions import sliding_window_ppl

16atol_vanilla_fit = Atol.fit

19def local_atol_fit(self, X, y=None, sample_weight=None):

20 """ local modification to prevent FutureWarning triggered by np.concatenate(X) when X is a pd.Series."""

21 if hasattr(X, "values"):

22 X = X.values

23 return atol_vanilla_fit(self=self, X=X)

26Atol.fit = local_atol_fit

29class TopologicalEmbedding(LocalPipeline):

30 """Topological embedding for multiple time series.

32 Slices time series into smaller time series windows, forms an affinity matrix on each window

33 and applies a Rips procedure to produce persistence diagrams for each affinity

34 matrix. Then uses Atol [ref:Atol] on each dimension through the

35 gudhi.representation.Archipelago representation to produce topological vectorization.

37 Read more in the :ref:`User Guide <topological_embedding>`.

39 Parameters

40 ----------

41 window_size : int, default=40

42 Size of the sliding window algorithm to extract subsequences as input to named_pipeline.

43 step : int, default=5

44 Size of the sliding window steps between each window.

45 n_centers_by_dim : int, default=5

46 The number of centroids to generate by dimension for vectorizing topological features.

47 The resulting embedding will have total dimension =< tda_max_dim * n_centers_by_dim.

48 The resulting embedding dimension might be smaller because of the KMeans algorithm in the Archipelago step.

49 tda_max_dim : int, default=2

50 The maximum dimension of the topological feature extraction.

52 Examples

53 ----------

54 >>> n_timestamps = 100

55 >>> n_sensors = 5

56 >>> timestamps = pd.to_datetime('2024-01-01', utc=True) + pd.Timedelta(1, 'h') * np.arange(n_timestamps)

57 >>> X = pd.DataFrame(np.random.random(size=(n_timestamps, n_sensors)), index=timestamps)

58 >>> TopologicalEmbedding(n_centers_by_dim=2, tda_max_dim=1).fit_transform(X)

59 """

61 def __init__(

62 self,

63 window_size: int = 40,

64 step: int = 5,

65 tda_max_dim: int = 2,

66 n_centers_by_dim: int = 5,

67 ):

68 self.window_size = window_size

69 self.step = step

70 self.tda_max_dim = tda_max_dim

71 self.n_centers_by_dim = n_centers_by_dim

72 named_ppl = PersistenceDiagramTransformer(

73 tda_max_dim=self.tda_max_dim,

74 )

75 super().__init__(steps=[

76 ("StandardScaler",

77 StandardScaler()

78 ),

79 ("SlidingPersistenceDiagramTransformer",

80 FunctionTransformer(func=sliding_window_ppl, kw_args={

81 "window_size": self.window_size, "step": self.step, "pipeline": named_ppl})

82 ),

83 ("Archipelago",

84 ColumnTransformer(

85 [(f"Atol{i}",

86 Atol(quantiser=KMeans(n_clusters=self.n_centers_by_dim, random_state=202312, n_init="auto")), i)

87 for i in range(self.tda_max_dim + 1)])

88 ),

89 ])

90 super().set_output(transform="pandas")

Coverage for tdaad/topological_embedding.py: 100%

22 statements