Module pulearn.registry

Registry and contributor scaffolding for PU algorithms.

Functions

def get_algorithm_registry()
Expand source code Browse git
def get_algorithm_registry():
    """Return a copy of the algorithm registry keyed by short name."""
    return dict(_ALGORITHM_REGISTRY)

Return a copy of the algorithm registry keyed by short name.

def get_algorithm_spec(key)
Expand source code Browse git
def get_algorithm_spec(key):
    """Return the registry entry for a known algorithm key."""
    try:
        return _ALGORITHM_REGISTRY[key]
    except KeyError as exc:
        raise KeyError(
            "Unknown PU algorithm {!r}. Available keys: {}.".format(
                key,
                ", ".join(list_registered_algorithms()),
            )
        ) from exc

Return the registry entry for a known algorithm key.

def get_new_algorithm_checklist()
Expand source code Browse git
def get_new_algorithm_checklist():
    """Return the contributor checklist for adding a new algorithm."""
    return _NEW_ALGORITHM_CHECKLIST

Return the contributor checklist for adding a new algorithm.

def get_scaffold_templates()
Expand source code Browse git
def get_scaffold_templates():
    """Return absolute scaffold paths from a repository checkout."""
    return _resolve_scaffold_templates(_project_root())

Return absolute scaffold paths from a repository checkout.

def list_registered_algorithms()
Expand source code Browse git
def list_registered_algorithms():
    """Return the registered algorithm keys in deterministic order."""
    return tuple(_ALGORITHM_REGISTRY)

Return the registered algorithm keys in deterministic order.

def validate_algorithm_spec(spec)
Expand source code Browse git
def validate_algorithm_spec(spec):
    """Raise an informative error if registry metadata is incomplete."""
    if not isinstance(spec.key, str) or not spec.key.strip():
        raise ValueError("Registry entries must define a non-empty key.")
    if not isinstance(spec.summary, str) or not spec.summary.strip():
        raise ValueError(
            "Registry entry {!r} must define a non-empty summary.".format(
                spec.key
            )
        )
    if not isinstance(spec.family, str) or not spec.family.strip():
        raise ValueError(
            "Registry entry {!r} must define a non-empty family.".format(
                spec.key
            )
        )
    if spec.assumption not in _ALLOWED_ASSUMPTIONS:
        raise ValueError(
            "Registry entry {!r} uses unsupported assumption {!r}. "
            "Expected one of {}.".format(
                spec.key,
                spec.assumption,
                sorted(_ALLOWED_ASSUMPTIONS),
            )
        )
    if not isinstance(spec.estimator_cls, type):
        raise ValueError(
            (
                "Registry entry {!r} must define estimator_cls as a "
                "class/type."
            ).format(spec.key)
        )
    if not issubclass(spec.estimator_cls, ClassifierMixin):
        raise ValueError(
            "Registry entry {!r} must reference a classifier.".format(spec.key)
        )
    if spec.uses_base_contract and not issubclass(
        spec.estimator_cls, BasePUClassifier
    ):
        raise ValueError(
            "Registry entry {!r} opts into the shared PU contract but {!r} "
            "does not inherit from BasePUClassifier.".format(
                spec.key,
                spec.estimator_name,
            )
        )

    required_refs = {
        "docs_reference": spec.docs_reference,
        "test_reference": spec.test_reference,
        "benchmark_reference": spec.benchmark_reference,
        "contract_reference": spec.contract_reference,
    }
    for field_name, value in required_refs.items():
        if not isinstance(value, str) or not value.strip():
            raise ValueError(
                "Registry entry {!r} must define {}.".format(
                    spec.key,
                    field_name,
                )
            )

Raise an informative error if registry metadata is incomplete.

Classes

class PUAlgorithmSpec (key: str,
estimator_cls: type,
family: str,
assumption: str,
summary: str,
docs_reference: str,
test_reference: str,
benchmark_reference: str,
contract_reference: str,
requires_predict_proba: bool = True,
shared_label_normalization: bool = True,
uses_base_contract: bool = True)
Expand source code Browse git
@dataclass(frozen=True)
class PUAlgorithmSpec:
    """Metadata describing a registered PU algorithm."""

    key: str
    estimator_cls: type
    family: str
    assumption: str
    summary: str
    docs_reference: str
    test_reference: str
    benchmark_reference: str
    contract_reference: str
    requires_predict_proba: bool = True
    shared_label_normalization: bool = True
    uses_base_contract: bool = True

    @property
    def estimator_name(self):
        """Return the estimator class name."""
        if isinstance(self.estimator_cls, type):
            return self.estimator_cls.__name__
        return repr(self.estimator_cls)

Metadata describing a registered PU algorithm.

Instance variables

var assumption : str

The type of the None singleton.

var benchmark_reference : str

The type of the None singleton.

var contract_reference : str

The type of the None singleton.

var docs_reference : str

The type of the None singleton.

var estimator_cls : type

The type of the None singleton.

prop estimator_name
Expand source code Browse git
@property
def estimator_name(self):
    """Return the estimator class name."""
    if isinstance(self.estimator_cls, type):
        return self.estimator_cls.__name__
    return repr(self.estimator_cls)

Return the estimator class name.

var family : str

The type of the None singleton.

var key : str

The type of the None singleton.

var requires_predict_proba : bool

The type of the None singleton.

var shared_label_normalization : bool

The type of the None singleton.

var summary : str

The type of the None singleton.

var test_reference : str

The type of the None singleton.

var uses_base_contract : bool

The type of the None singleton.