Source code for tadkit.base.formalizer

import abc
from typing import Sequence, Union

from tadkit.base.typing import KWParams, ParamsDescription, Array


[docs] class Formalizer(abc.ABC): """Abstract class of data formalizer (provider). Transforms Data from Confiance DataProvider into standard Data for ML pipelines. Methods: formalize: Take a data query and return associated data. no_data_leakage: Check if no leakage from a first data query to a second. Properties: query_description: Get the description of a data query. available_properties: Get the properties that the formalized data satisfies. Example of usage: >>> assert issubclass(MyFormalizer, Formalizer) >>> formalizer = MyFormalizer(**args_init) >>> formalizer.available_properties # The provided property of the formalized data >>> formalizer.query_description # The description of the queries >>> query_train = ... # Query to create data, following the query description >>> query_test = ... >>> X_test = formalizer.formalize(query_test) >>> X_train = formalizer.formalize(query_train) """ @property @abc.abstractmethod def available_properties(self) -> Sequence[str]: return [] @property @abc.abstractmethod def query_description(self) -> ParamsDescription: return {}
[docs] def default_query(self): # NB: this hints at queries having a default value for all parameters. return { name: param["default"] for name, param in self.query_description.items() }
[docs] @abc.abstractmethod def formalize(self, **query: KWParams) -> Union[Array, Sequence[Array]]: raise NotImplementedError
@classmethod def __subclasshook__(cls, subclass): if not ( hasattr(subclass, "formalize") and callable(subclass.formalize) and hasattr(subclass, "available_properties") and not callable(subclass.available_properties) and hasattr(subclass, "query_description") and not callable(subclass.query_description) ): return False if cls is Formalizer: return True return NotImplemented