Source code for tadkit.base.formalizer
import abc
from typing import Sequence, Union
from tadkit.base.typing import KWParams, ParamsDescription, Array
[docs]
class Formalizer(abc.ABC):
"""Abstract class of data formalizer (provider).
Transforms Data from Confiance DataProvider into standard Data for ML pipelines.
Methods:
formalize: Take a data query and return associated data.
no_data_leakage: Check if no leakage from a first data query to a second.
Properties:
query_description: Get the description of a data query.
available_properties: Get the properties that the formalized data satisfies.
Example of usage:
>>> assert issubclass(MyFormalizer, Formalizer)
>>> formalizer = MyFormalizer(**args_init)
>>> formalizer.available_properties # The provided property of the formalized data
>>> formalizer.query_description # The description of the queries
>>> query_train = ... # Query to create data, following the query description
>>> query_test = ...
>>> X_test = formalizer.formalize(query_test)
>>> X_train = formalizer.formalize(query_train)
"""
@property
@abc.abstractmethod
def available_properties(self) -> Sequence[str]:
return []
@property
@abc.abstractmethod
def query_description(self) -> ParamsDescription:
return {}
[docs]
def default_query(self):
# NB: this hints at queries having a default value for all parameters.
return {
name: param["default"] for name, param in self.query_description.items()
}
[docs]
@abc.abstractmethod
def formalize(self, **query: KWParams) -> Union[Array, Sequence[Array]]:
raise NotImplementedError
@classmethod
def __subclasshook__(cls, subclass):
if not (
hasattr(subclass, "formalize")
and callable(subclass.formalize)
and hasattr(subclass, "available_properties")
and not callable(subclass.available_properties)
and hasattr(subclass, "query_description")
and not callable(subclass.query_description)
):
return False
if cls is Formalizer:
return True
return NotImplemented