Coverage for tdaad/persistencediagram_transformer.py: 100%
27 statements
« prev ^ index » next coverage.py v7.9.1, created at 2025-06-13 13:45 +0000
« prev ^ index » next coverage.py v7.9.1, created at 2025-06-13 13:45 +0000
1"""Persistence Diagram Transformers."""
3# Author: Martin Royer
5import numpy as np
6from operator import itemgetter
8from gudhi.sklearn.rips_persistence import RipsPersistence
9from sklearn.preprocessing import FunctionTransformer
11from tdaad.utils.local_pipeline import LocalPipeline
14def _data_to_similarity(X, filter_nan=True):
15 r"""Transforms dataframe X into similarity matrix :math:`1-\mathbf{Corr}(X)`."""
16 target = 1 - X.corr().to_numpy()
17 nanrowcols = np.isnan(target).all(axis=0) if filter_nan else ~target.any(
18 axis=0) # this filters when a variable is constant -> nan on all rows
19 return target[~nanrowcols, :][:, ~nanrowcols]
22def wrap_in_list(X):
23 """ wrapper because RipsPersistence.transform expects a list"""
24 return [X]
27class PersistenceDiagramTransformer(LocalPipeline):
28 """Persistence Diagram Transformer for point cloud.
30 For a given point cloud, form a similarity matrix and apply a RipsPersistence procedure
31 to produce topological descriptors in the form of persistence diagrams.
33 Read more in the :ref: `User Guide <persistence_diagrams>`.
35 Parameters:
36 tda_max_dim : int, default=2
37 The maximum dimension of the topological feature extraction.
39 Example
40 -------
41 >>> n_timestamps = 100
42 >>> n_sensors = 5
43 >>> timestamps = pd.to_datetime('2024-01-01', utc=True) + pd.Timedelta(1, 'h') * np.arange(n_timestamps)
44 >>> X = pd.DataFrame(np.random.random(size=(n_timestamps, n_sensors)), index=timestamps)
45 >>> PersistenceDiagramTransformer().fit_transform(X)
46 """
48 def __init__(self, tda_max_dim=2):
49 self.tda_max_dim = tda_max_dim
50 similarity_transformer = FunctionTransformer(func=_data_to_similarity)
51 similarity_transformer.name = r"1-$\mathbf{Corr}(X)$"
52 list_encapsulate_transformer = FunctionTransformer(func=wrap_in_list)
53 list_encapsulate_transformer.name = ""
54 rips_transformer = RipsPersistence(homology_dimensions=range(
55 tda_max_dim + 1), input_type='lower distance matrix')
56 rips_transformer.name = ""
57 list_popper_transformer = FunctionTransformer(func=itemgetter(0))
58 list_popper_transformer.name = ""
60 steps = [
61 ("similarity_step", similarity_transformer),
62 ("list_encapsulate", list_encapsulate_transformer),
63 ("rips_step", rips_transformer),
64 ("list_popper", list_popper_transformer),
65 ]
66 super().__init__(steps=steps)
68 def fit_transform(self, X, y=None, **fit_params):
69 """Transforms data X into a list of persistence diagrams arranged in order of homology dimension.
71 Args:
72 X : {array-like, sparse matrix} of shape (n_timestamps, n_sensors)
73 Multiple time series to transform, where `n_timestamps` is the number of timestamps
74 in the series X, and `n_sensors` is the number of sensors.
75 y : Ignored
76 Not used, present for API consistency by convention.
78 **fit_params : Ignored
79 Not used, present for API consistency.
81 Nb: this function can be removed, but is here so that returns can be explicited.
83 Returns:
84 --------
85 by_dim_arrays: list of persistence diagrams [pd_0, pd_1, ...] arranged in order of homology dimension.
86 a persistence diagram pd_i is a ndarray of shape {n_i, 2} where n_i is the number of homological
87 features in dimension i found in the similarity matrix of the data.
89 """
90 by_dim_arrays = self.fit(X=X, y=y, **fit_params).transform(X)
91 return by_dim_arrays