Coverage for tdaad/persistencediagram_transformer.py: 100%

27 statements  

« prev     ^ index     » next       coverage.py v7.9.1, created at 2025-06-13 13:45 +0000

1"""Persistence Diagram Transformers.""" 

2 

3# Author: Martin Royer 

4 

5import numpy as np 

6from operator import itemgetter 

7 

8from gudhi.sklearn.rips_persistence import RipsPersistence 

9from sklearn.preprocessing import FunctionTransformer 

10 

11from tdaad.utils.local_pipeline import LocalPipeline 

12 

13 

14def _data_to_similarity(X, filter_nan=True): 

15 r"""Transforms dataframe X into similarity matrix :math:`1-\mathbf{Corr}(X)`.""" 

16 target = 1 - X.corr().to_numpy() 

17 nanrowcols = np.isnan(target).all(axis=0) if filter_nan else ~target.any( 

18 axis=0) # this filters when a variable is constant -> nan on all rows 

19 return target[~nanrowcols, :][:, ~nanrowcols] 

20 

21 

22def wrap_in_list(X): 

23 """ wrapper because RipsPersistence.transform expects a list""" 

24 return [X] 

25 

26 

27class PersistenceDiagramTransformer(LocalPipeline): 

28 """Persistence Diagram Transformer for point cloud. 

29 

30 For a given point cloud, form a similarity matrix and apply a RipsPersistence procedure 

31 to produce topological descriptors in the form of persistence diagrams. 

32 

33 Read more in the :ref: `User Guide <persistence_diagrams>`. 

34 

35 Parameters: 

36 tda_max_dim : int, default=2 

37 The maximum dimension of the topological feature extraction. 

38 

39 Example 

40 ------- 

41 >>> n_timestamps = 100 

42 >>> n_sensors = 5 

43 >>> timestamps = pd.to_datetime('2024-01-01', utc=True) + pd.Timedelta(1, 'h') * np.arange(n_timestamps) 

44 >>> X = pd.DataFrame(np.random.random(size=(n_timestamps, n_sensors)), index=timestamps) 

45 >>> PersistenceDiagramTransformer().fit_transform(X) 

46 """ 

47 

48 def __init__(self, tda_max_dim=2): 

49 self.tda_max_dim = tda_max_dim 

50 similarity_transformer = FunctionTransformer(func=_data_to_similarity) 

51 similarity_transformer.name = r"1-$\mathbf{Corr}(X)$" 

52 list_encapsulate_transformer = FunctionTransformer(func=wrap_in_list) 

53 list_encapsulate_transformer.name = "" 

54 rips_transformer = RipsPersistence(homology_dimensions=range( 

55 tda_max_dim + 1), input_type='lower distance matrix') 

56 rips_transformer.name = "" 

57 list_popper_transformer = FunctionTransformer(func=itemgetter(0)) 

58 list_popper_transformer.name = "" 

59 

60 steps = [ 

61 ("similarity_step", similarity_transformer), 

62 ("list_encapsulate", list_encapsulate_transformer), 

63 ("rips_step", rips_transformer), 

64 ("list_popper", list_popper_transformer), 

65 ] 

66 super().__init__(steps=steps) 

67 

68 def fit_transform(self, X, y=None, **fit_params): 

69 """Transforms data X into a list of persistence diagrams arranged in order of homology dimension. 

70 

71 Args: 

72 X : {array-like, sparse matrix} of shape (n_timestamps, n_sensors) 

73 Multiple time series to transform, where `n_timestamps` is the number of timestamps 

74 in the series X, and `n_sensors` is the number of sensors. 

75 y : Ignored 

76 Not used, present for API consistency by convention. 

77 

78 **fit_params : Ignored 

79 Not used, present for API consistency. 

80 

81 Nb: this function can be removed, but is here so that returns can be explicited. 

82 

83 Returns: 

84 -------- 

85 by_dim_arrays: list of persistence diagrams [pd_0, pd_1, ...] arranged in order of homology dimension. 

86 a persistence diagram pd_i is a ndarray of shape {n_i, 2} where n_i is the number of homological 

87 features in dimension i found in the similarity matrix of the data. 

88 

89 """ 

90 by_dim_arrays = self.fit(X=X, y=y, **fit_params).transform(X) 

91 return by_dim_arrays