Coverage for tadkit/catalog/learners/_sklearn_learners.py: 69%

35 statements  

« prev     ^ index     » next       coverage.py v7.10.6, created at 2025-09-04 15:09 +0000

1import numpy as np 

2 

3from sklearn.neighbors import KernelDensity 

4from sklearn.ensemble import IsolationForest 

5from sklearn.preprocessing import QuantileTransformer, StandardScaler 

6from sklearn.pipeline import Pipeline 

7 

8 

9IsolationForestLearner = IsolationForest 

10IsolationForestLearner.required_properties = [] 

11IsolationForestLearner.params_description = { 

12 "n_estimators": { 

13 "description": "The number of base estimators in the ensemble" 

14 + ":" 

15 + str(IsolationForestLearner._parameter_constraints["n_estimators"][0]), 

16 "value_type": "range", 

17 "start": 1, 

18 "stop": 1000, 

19 "step": 10, 

20 "default": 10, 

21 } 

22} 

23 

24KernelDensityLearner = KernelDensity 

25KernelDensityLearner.required_properties = [] 

26KernelDensityLearner.params_description = { 

27 "kernel": { 

28 "description": str(KernelDensity._parameter_constraints["kernel"][0]), 

29 "value_type": "choice", 

30 "set": list(KernelDensity._parameter_constraints["kernel"][0].options), 

31 "default": "gaussian", 

32 } 

33} 

34 

35KernelDensity.oldfit = KernelDensity.fit 

36 

37 

38def fit(self, X, y=None, sample_weight=None): 

39 self.oldfit(X=X, y=y) 

40 contamination = 0.1 

41 self.offset_ = np.percentile(self.score_samples(X), 100.0 * contamination) 

42 return self 

43 

44 

45KernelDensityLearner.fit = fit 

46 

47 

48def predict(self, X): 

49 decision_func = self.score_samples(X) - self.offset_ 

50 is_inlier = np.ones_like(decision_func, dtype=int) 

51 is_inlier[decision_func < 0] = -1 

52 return is_inlier 

53 

54 

55KernelDensityLearner.predict = predict 

56 

57 

58class ScaledKernelDensityLearner(Pipeline): 

59 """Learner class wrapped from scikit-learn's KernelDensity class, with a scaler preprocessor.""" 

60 

61 required_properties = [] 

62 params_description = { 

63 "scaling": { 

64 "description": "Scaling method", 

65 "family": "scaling", 

66 "value_type": "choice", 

67 "set": ["quantile_normal", "standard"], 

68 } 

69 } 

70 

71 def __init__(self, scaling="standard"): 

72 self.scaling = scaling 

73 if scaling == "standard": 

74 scaler = StandardScaler() 

75 elif scaling == "quantile_normal": 

76 scaler = QuantileTransformer(output_distribution="normal") 

77 else: 

78 raise ValueError("Unavailable scaling") 

79 super().__init__([("scaler", scaler), ("learner", KernelDensity())])