Coverage for tdaad/persistencediagram

1"""Persistence Diagram Transformers."""

3# Author: Martin Royer

5import numpy as np

6from operator import itemgetter

8from gudhi.sklearn.rips_persistence import RipsPersistence

9from sklearn.preprocessing import FunctionTransformer

11from tdaad.utils.local_pipeline import LocalPipeline

14def _data_to_similarity(X, filter_nan=True):

15 r"""Transforms dataframe X into similarity matrix :math:`1-\mathbf{Corr}(X)`."""

16 target = 1 - X.corr().to_numpy()

17 nanrowcols = np.isnan(target).all(axis=0) if filter_nan else ~target.any(

18 axis=0) # this filters when a variable is constant -> nan on all rows

19 return target[~nanrowcols, :][:, ~nanrowcols]

22def wrap_in_list(X):

23 """ wrapper because RipsPersistence.transform expects a list"""

24 return [X]

27class PersistenceDiagramTransformer(LocalPipeline):

28 """Persistence Diagram Transformer for point cloud.

30 For a given point cloud, form a similarity matrix and apply a RipsPersistence procedure

31 to produce topological descriptors in the form of persistence diagrams.

33 Read more in the :ref: `User Guide <persistence_diagrams>`.

35 Parameters:

36 tda_max_dim : int, default=2

37 The maximum dimension of the topological feature extraction.

39 Example

40 -------

41 >>> n_timestamps = 100

42 >>> n_sensors = 5

43 >>> timestamps = pd.to_datetime('2024-01-01', utc=True) + pd.Timedelta(1, 'h') * np.arange(n_timestamps)

44 >>> X = pd.DataFrame(np.random.random(size=(n_timestamps, n_sensors)), index=timestamps)

45 >>> PersistenceDiagramTransformer().fit_transform(X)

46 """

48 def __init__(self, tda_max_dim=2):

49 self.tda_max_dim = tda_max_dim

50 similarity_transformer = FunctionTransformer(func=_data_to_similarity)

51 similarity_transformer.name = r"1-$\mathbf{Corr}(X)$"

52 list_encapsulate_transformer = FunctionTransformer(func=wrap_in_list)

53 list_encapsulate_transformer.name = ""

54 rips_transformer = RipsPersistence(homology_dimensions=range(

55 tda_max_dim + 1), input_type='lower distance matrix')

56 rips_transformer.name = ""

57 list_popper_transformer = FunctionTransformer(func=itemgetter(0))

58 list_popper_transformer.name = ""

60 steps = [

61 ("similarity_step", similarity_transformer),

62 ("list_encapsulate", list_encapsulate_transformer),

63 ("rips_step", rips_transformer),

64 ("list_popper", list_popper_transformer),

65 ]

66 super().__init__(steps=steps)

68 def fit_transform(self, X, y=None, **fit_params):

69 """Transforms data X into a list of persistence diagrams arranged in order of homology dimension.

71 Args:

72 X : {array-like, sparse matrix} of shape (n_timestamps, n_sensors)

73 Multiple time series to transform, where `n_timestamps` is the number of timestamps

74 in the series X, and `n_sensors` is the number of sensors.

75 y : Ignored

76 Not used, present for API consistency by convention.

78 **fit_params : Ignored

79 Not used, present for API consistency.

81 Nb: this function can be removed, but is here so that returns can be explicited.

83 Returns:

84 --------

85 by_dim_arrays: list of persistence diagrams [pd_0, pd_1, ...] arranged in order of homology dimension.

86 a persistence diagram pd_i is a ndarray of shape {n_i, 2} where n_i is the number of homological

87 features in dimension i found in the similarity matrix of the data.

89 """

90 by_dim_arrays = self.fit(X=X, y=y, **fit_params).transform(X)

91 return by_dim_arrays

Coverage for tdaad/persistencediagram_transformer.py: 100%

27 statements