Coverage for tdaad/utils/tda_functions.py: 100%
11 statements
« prev ^ index » next coverage.py v7.11.0, created at 2025-10-16 16:23 +0000
« prev ^ index » next coverage.py v7.11.0, created at 2025-10-16 16:23 +0000
1"""Persistence Diagram Transformers."""
3# Author: Martin Royer
5import numpy as np
7from gudhi.sklearn.rips_persistence import RipsPersistence
10def _numpy_data_to_similarity(X, filter_nan=True):
11 r"""Transforms numpy matrix X into similarity matrix :math:`1-\mathbf{Corr}(X)`."""
12 target = 1 - np.corrcoef(X, rowvar=False)
13 # this filters when a variable is constant -> nan on all rows
14 nanrowcols = np.isnan(target).all(axis=0) if filter_nan else ~target.any(axis=0)
15 return target[~nanrowcols][:, ~nanrowcols]
18def transform_to_persistence_diagram(X, tda_max_dim=0):
19 """Persistence Diagram Transformer for point cloud.
21 For a given point cloud, form a similarity matrix and apply a RipsPersistence procedure
22 to produce topological descriptors in the form of persistence diagrams.
24 Read more in the :ref: `User Guide <persistence_diagrams>`.
26 Parameters:
27 tda_max_dim : int, default=0
28 The maximum dimension of the topological feature extraction.
30 Example
31 -------
32 >>> n_timestamps = 100
33 >>> n_sensors = 5
34 >>> import numpy as np
35 >>> np.corrcoef(X)
36 >>> import pandas as pd
37 >>> timestamps = pd.to_datetime('2024-01-01', utc=True) + pd.Timedelta(1, 'h') * np.arange(n_timestamps)
38 >>> X = pd.DataFrame(np.random.random(size=(n_timestamps, n_sensors)), index=timestamps)
39 >>> PersistenceDiagramTransformer().fit_transform(X.to_numpy())
40 """
41 sim_target = [_numpy_data_to_similarity(X)]
42 rips_transformer = RipsPersistence(
43 homology_dimensions=range(tda_max_dim + 1),
44 input_type="lower distance matrix",
45 )
46 rips_target = rips_transformer.transform(sim_target)
47 return rips_target[0]