Module pulearn.elkanoto

Both PU classification methods from the Elkan & Noto paper.

Expand source code Browse git
"""Both PU classification methods from the Elkan & Noto paper."""

import numpy as np
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.exceptions import NotFittedError

class ElkanotoPuClassifier(BaseEstimator, ClassifierMixin):
    """Positive-unlabeled classifier using the unweighted Elkan & Noto method.

    estimator : sklearn.BaseEstimator
        Any sklearn-compliant estimator object implementing the fit() and
        predict_proba() methods.
    hold_out_ratio : float, default 0.1
       The ratio of training examples to set aside to estimate the probability
       of an exmaple to be positive.

    def __init__(self, estimator, hold_out_ratio=0.1):
        self.estimator = estimator
        # c is the constant proba that a example is positive, init to 1
        self.c = 1.0
        self.hold_out_ratio = hold_out_ratio
        self.estimator_fitted = False

    def __str__(self):
        return 'Estimator: {}\np(s=1|y=1,x) ~= {}\nFitted: {}'.format(

    def fit(self, X, y):
        """Fits the classifier

        X : array-like, shape = [n_samples, n_features]
            The training input samples.
        y : array-like, shape = [n_samples]
            The target values. An array of int.

        self : object
            Returns self.
        positives = np.where(y == 1.0)[0]
        hold_out_size = int(np.ceil(len(positives) * self.hold_out_ratio))
        # check for the required number of positive examples
        if len(positives) <= hold_out_size:
            raise ValueError(
                'Not enough positive examples to estimate p(s=1|y=1,x).'
                ' Need at least {}.'.format(hold_out_size + 1)
        # construct the holdout set
        hold_out = positives[:hold_out_size]
        X_hold_out = X[hold_out]
        X = np.delete(X, hold_out, 0)
        y = np.delete(y, hold_out)
        # fit the inner estimator, y)
        hold_out_predictions = self.estimator.predict_proba(X_hold_out)
        hold_out_predictions = hold_out_predictions[:, 1]
        # try:
        #     hold_out_predictions = hold_out_predictions[:, 1]
        # except TypeError:
        #     pass
        # update c, the positive proba estimate
        c = np.mean(hold_out_predictions)
        self.c = c
        self.estimator_fitted = True

    def predict_proba(self, X):
        """Predict class probabilities for X.

        X : array-like of shape = [n_samples, n_features]
            The input samples.

        p : array of shape = [n_samples, n_classes]
            The class probabilities of the input samples. The order of the
            classes corresponds to that in the attribute classes_.
        if not self.estimator_fitted:
            raise NotFittedError(
                'The estimator must be fitted before calling predict_proba().'
        probabilistic_predictions = self.estimator.predict_proba(X)
        probabilistic_predictions = probabilistic_predictions[:, 1]
        return probabilistic_predictions / self.c

    def predict(self, X, threshold=0.5):
        """Predict labels.

        X : array-like of shape = [n_samples, n_features]
            The input samples.
        threshold : float, default 0.5
            The decision threshold over probability to warrent a
            positive label.

        y : array of int of shape = [n_samples]
            Predicted labels for the given inpurt samples.
        if not self.estimator_fitted:
            raise NotFittedError(
                'The estimator must be fitted before calling predict(...).'
        return np.array([
            1.0 if p > threshold else -1.0
            for p in self.predict_proba(X)

class WeightedElkanotoPuClassifier(BaseEstimator, ClassifierMixin):
    """Positive-unlabeled classifier using the weighted Elkan & Noto method.

    See the original paper for details on how the `labeled` and `unlabeled`
    quantities are used to weigh training examples and affect the learning

    estimator : sklearn.BaseEstimator
        Any sklearn-compliant estimator object implementing the fit() and
        predict_proba() methods.
    labeled : int
        The cardinality to attribute to the labeled training set.
    unlabeled : int
        The cardinality to attribute to the unlabeled training set.
    hold_out_ratio : float, default 0.1
       The ratio of training examples to set aside to estimate the probability
       of an exmaple to be positive.

    def __init__(self, estimator, labeled, unlabeled, hold_out_ratio=0.1):
        self.estimator = estimator
        self.c = 1.0
        self.hold_out_ratio = hold_out_ratio
        self.labeled = labeled
        self.unlabeled = unlabeled
        self.estimator_fitted = False

    def __str__(self):
        return 'Estimator: {}\np(s=1|y=1,x) ~= {}\nFitted: {}'.format(

    def fit(self, X, y):
        """Fits the classifier

        X : array-like, shape = [n_samples, n_features]
            The training input samples.
        y : array-like, shape = [n_samples]
            The target values. An array of int.

        self : object
            Returns self.
        positives = np.where(y == 1.0)[0]
        hold_out_size = int(np.ceil(len(positives) * self.hold_out_ratio))
        # check for the required number of positive examples
        if len(positives) <= hold_out_size:
            raise ValueError(
                'Not enough positive examples to estimate p(s=1|y=1,x).'
                ' Need at least {}.'.format(hold_out_size + 1)
        # construct the holdout set
        hold_out = positives[:hold_out_size]
        X_hold_out = X[hold_out]
        X = np.delete(X, hold_out, 0)
        y = np.delete(y, hold_out)
        # fit the inner estimator, y)
        hold_out_predictions = self.estimator.predict_proba(X_hold_out)
        hold_out_predictions = hold_out_predictions[:, 1]
        # update c, the positive proba estimate
        c = np.mean(hold_out_predictions)
        self.c = c
        self.estimator_fitted = True

    # Returns E[y] which is P(y=1)
    def _estimateEy(self, G):
        n = self.labeled
        m = self.labeled + self.unlabeled
        G = G[:, 1], G == 1.0, 0.999)
        W = (G / (1 - G)) * ((1 - self.c) / self.c)
        return (float(n) + float(W.sum())) / float(m)

    def predict_proba(self, X):
        """Predict class probabilities for X.

        X : array-like of shape = [n_samples, n_features]
            The input samples.

        p : array of shape = [n_samples, n_classes]
            The class probabilities of the input samples. The order of the
            classes corresponds to that in the attribute classes_.
        if not self.estimator_fitted:
            raise NotFittedError(
                'The estimator must be fitted before calling predict_proba().'
        n = self.labeled
        m = self.labeled + self.unlabeled
        # self.estimator.predict_proba gives the probability of P(s=1|x)
        # for x belongs to P or U
        probabilistic_predictions = self.estimator.predict_proba(X)
        yEstimate = self._estimateEy(probabilistic_predictions)
        probabilistic_predictions = probabilistic_predictions[:, 1]
        numerator = probabilistic_predictions * (self.c * yEstimate * m)
        return numerator / float(n)

    def predict(self, X, treshold=0.5):
        """Predict labels.

        X : array-like of shape = [n_samples, n_features]
            The input samples.
        threshold : float, default 0.5
            The decision threshold over probability to warrent a
            positive label.

        y : array of int of shape = [n_samples]
            Predicted labels for the given inpurt samples.
        if not self.estimator_fitted:
            raise NotFittedError(
                'The estimator must be fitted before calling predict().'
        return np.array([
            1.0 if p > treshold else -1.0
            for p in self.predict_proba(X)


class ElkanotoPuClassifier (estimator, hold_out_ratio=0.1)

Positive-unlabeled classifier using the unweighted Elkan & Noto method.


estimator : sklearn.BaseEstimator
Any sklearn-compliant estimator object implementing the fit() and predict_proba() methods.
hold_out_ratio : float, default 0.1

The ratio of training examples to set aside to estimate the probability of an exmaple to be positive.

Expand source code Browse git
class ElkanotoPuClassifier(BaseEstimator, ClassifierMixin):
    """Positive-unlabeled classifier using the unweighted Elkan & Noto method.

    estimator : sklearn.BaseEstimator
        Any sklearn-compliant estimator object implementing the fit() and
        predict_proba() methods.
    hold_out_ratio : float, default 0.1
       The ratio of training examples to set aside to estimate the probability
       of an exmaple to be positive.

    def __init__(self, estimator, hold_out_ratio=0.1):
        self.estimator = estimator
        # c is the constant proba that a example is positive, init to 1
        self.c = 1.0
        self.hold_out_ratio = hold_out_ratio
        self.estimator_fitted = False

    def __str__(self):
        return 'Estimator: {}\np(s=1|y=1,x) ~= {}\nFitted: {}'.format(

    def fit(self, X, y):
        """Fits the classifier

        X : array-like, shape = [n_samples, n_features]
            The training input samples.
        y : array-like, shape = [n_samples]
            The target values. An array of int.

        self : object
            Returns self.
        positives = np.where(y == 1.0)[0]
        hold_out_size = int(np.ceil(len(positives) * self.hold_out_ratio))
        # check for the required number of positive examples
        if len(positives) <= hold_out_size:
            raise ValueError(
                'Not enough positive examples to estimate p(s=1|y=1,x).'
                ' Need at least {}.'.format(hold_out_size + 1)
        # construct the holdout set
        hold_out = positives[:hold_out_size]
        X_hold_out = X[hold_out]
        X = np.delete(X, hold_out, 0)
        y = np.delete(y, hold_out)
        # fit the inner estimator, y)
        hold_out_predictions = self.estimator.predict_proba(X_hold_out)
        hold_out_predictions = hold_out_predictions[:, 1]
        # try:
        #     hold_out_predictions = hold_out_predictions[:, 1]
        # except TypeError:
        #     pass
        # update c, the positive proba estimate
        c = np.mean(hold_out_predictions)
        self.c = c
        self.estimator_fitted = True

    def predict_proba(self, X):
        """Predict class probabilities for X.

        X : array-like of shape = [n_samples, n_features]
            The input samples.

        p : array of shape = [n_samples, n_classes]
            The class probabilities of the input samples. The order of the
            classes corresponds to that in the attribute classes_.
        if not self.estimator_fitted:
            raise NotFittedError(
                'The estimator must be fitted before calling predict_proba().'
        probabilistic_predictions = self.estimator.predict_proba(X)
        probabilistic_predictions = probabilistic_predictions[:, 1]
        return probabilistic_predictions / self.c

    def predict(self, X, threshold=0.5):
        """Predict labels.

        X : array-like of shape = [n_samples, n_features]
            The input samples.
        threshold : float, default 0.5
            The decision threshold over probability to warrent a
            positive label.

        y : array of int of shape = [n_samples]
            Predicted labels for the given inpurt samples.
        if not self.estimator_fitted:
            raise NotFittedError(
                'The estimator must be fitted before calling predict(...).'
        return np.array([
            1.0 if p > threshold else -1.0
            for p in self.predict_proba(X)


  • sklearn.base.BaseEstimator
  • sklearn.base.ClassifierMixin


def fit(self, X, y)

Fits the classifier


X : array-like, shape = [n_samples, n_features]
The training input samples.
y : array-like, shape = [n_samples]
The target values. An array of int.


self : object
Returns self.
Expand source code Browse git
def fit(self, X, y):
    """Fits the classifier

    X : array-like, shape = [n_samples, n_features]
        The training input samples.
    y : array-like, shape = [n_samples]
        The target values. An array of int.

    self : object
        Returns self.
    positives = np.where(y == 1.0)[0]
    hold_out_size = int(np.ceil(len(positives) * self.hold_out_ratio))
    # check for the required number of positive examples
    if len(positives) <= hold_out_size:
        raise ValueError(
            'Not enough positive examples to estimate p(s=1|y=1,x).'
            ' Need at least {}.'.format(hold_out_size + 1)
    # construct the holdout set
    hold_out = positives[:hold_out_size]
    X_hold_out = X[hold_out]
    X = np.delete(X, hold_out, 0)
    y = np.delete(y, hold_out)
    # fit the inner estimator, y)
    hold_out_predictions = self.estimator.predict_proba(X_hold_out)
    hold_out_predictions = hold_out_predictions[:, 1]
    # try:
    #     hold_out_predictions = hold_out_predictions[:, 1]
    # except TypeError:
    #     pass
    # update c, the positive proba estimate
    c = np.mean(hold_out_predictions)
    self.c = c
    self.estimator_fitted = True
def predict(self, X, threshold=0.5)

Predict labels.


X : array-like of shape = [n_samples, n_features]
The input samples.
threshold : float, default 0.5
The decision threshold over probability to warrent a positive label.


y : array of int of shape = [n_samples]
Predicted labels for the given inpurt samples.
Expand source code Browse git
def predict(self, X, threshold=0.5):
    """Predict labels.

    X : array-like of shape = [n_samples, n_features]
        The input samples.
    threshold : float, default 0.5
        The decision threshold over probability to warrent a
        positive label.

    y : array of int of shape = [n_samples]
        Predicted labels for the given inpurt samples.
    if not self.estimator_fitted:
        raise NotFittedError(
            'The estimator must be fitted before calling predict(...).'
    return np.array([
        1.0 if p > threshold else -1.0
        for p in self.predict_proba(X)
def predict_proba(self, X)

Predict class probabilities for X.


X : array-like of shape = [n_samples, n_features]
The input samples.


p : array of shape = [n_samples, n_classes]
The class probabilities of the input samples. The order of the classes corresponds to that in the attribute classes_.
Expand source code Browse git
def predict_proba(self, X):
    """Predict class probabilities for X.

    X : array-like of shape = [n_samples, n_features]
        The input samples.

    p : array of shape = [n_samples, n_classes]
        The class probabilities of the input samples. The order of the
        classes corresponds to that in the attribute classes_.
    if not self.estimator_fitted:
        raise NotFittedError(
            'The estimator must be fitted before calling predict_proba().'
    probabilistic_predictions = self.estimator.predict_proba(X)
    probabilistic_predictions = probabilistic_predictions[:, 1]
    return probabilistic_predictions / self.c
class WeightedElkanotoPuClassifier (estimator, labeled, unlabeled, hold_out_ratio=0.1)

Positive-unlabeled classifier using the weighted Elkan & Noto method.

See the original paper for details on how the labeled and unlabeled quantities are used to weigh training examples and affect the learning process:


estimator : sklearn.BaseEstimator
Any sklearn-compliant estimator object implementing the fit() and predict_proba() methods.
labeled : int
The cardinality to attribute to the labeled training set.
unlabeled : int
The cardinality to attribute to the unlabeled training set.
hold_out_ratio : float, default 0.1

The ratio of training examples to set aside to estimate the probability of an exmaple to be positive.

Expand source code Browse git
class WeightedElkanotoPuClassifier(BaseEstimator, ClassifierMixin):
    """Positive-unlabeled classifier using the weighted Elkan & Noto method.

    See the original paper for details on how the `labeled` and `unlabeled`
    quantities are used to weigh training examples and affect the learning

    estimator : sklearn.BaseEstimator
        Any sklearn-compliant estimator object implementing the fit() and
        predict_proba() methods.
    labeled : int
        The cardinality to attribute to the labeled training set.
    unlabeled : int
        The cardinality to attribute to the unlabeled training set.
    hold_out_ratio : float, default 0.1
       The ratio of training examples to set aside to estimate the probability
       of an exmaple to be positive.

    def __init__(self, estimator, labeled, unlabeled, hold_out_ratio=0.1):
        self.estimator = estimator
        self.c = 1.0
        self.hold_out_ratio = hold_out_ratio
        self.labeled = labeled
        self.unlabeled = unlabeled
        self.estimator_fitted = False

    def __str__(self):
        return 'Estimator: {}\np(s=1|y=1,x) ~= {}\nFitted: {}'.format(

    def fit(self, X, y):
        """Fits the classifier

        X : array-like, shape = [n_samples, n_features]
            The training input samples.
        y : array-like, shape = [n_samples]
            The target values. An array of int.

        self : object
            Returns self.
        positives = np.where(y == 1.0)[0]
        hold_out_size = int(np.ceil(len(positives) * self.hold_out_ratio))
        # check for the required number of positive examples
        if len(positives) <= hold_out_size:
            raise ValueError(
                'Not enough positive examples to estimate p(s=1|y=1,x).'
                ' Need at least {}.'.format(hold_out_size + 1)
        # construct the holdout set
        hold_out = positives[:hold_out_size]
        X_hold_out = X[hold_out]
        X = np.delete(X, hold_out, 0)
        y = np.delete(y, hold_out)
        # fit the inner estimator, y)
        hold_out_predictions = self.estimator.predict_proba(X_hold_out)
        hold_out_predictions = hold_out_predictions[:, 1]
        # update c, the positive proba estimate
        c = np.mean(hold_out_predictions)
        self.c = c
        self.estimator_fitted = True

    # Returns E[y] which is P(y=1)
    def _estimateEy(self, G):
        n = self.labeled
        m = self.labeled + self.unlabeled
        G = G[:, 1], G == 1.0, 0.999)
        W = (G / (1 - G)) * ((1 - self.c) / self.c)
        return (float(n) + float(W.sum())) / float(m)

    def predict_proba(self, X):
        """Predict class probabilities for X.

        X : array-like of shape = [n_samples, n_features]
            The input samples.

        p : array of shape = [n_samples, n_classes]
            The class probabilities of the input samples. The order of the
            classes corresponds to that in the attribute classes_.
        if not self.estimator_fitted:
            raise NotFittedError(
                'The estimator must be fitted before calling predict_proba().'
        n = self.labeled
        m = self.labeled + self.unlabeled
        # self.estimator.predict_proba gives the probability of P(s=1|x)
        # for x belongs to P or U
        probabilistic_predictions = self.estimator.predict_proba(X)
        yEstimate = self._estimateEy(probabilistic_predictions)
        probabilistic_predictions = probabilistic_predictions[:, 1]
        numerator = probabilistic_predictions * (self.c * yEstimate * m)
        return numerator / float(n)

    def predict(self, X, treshold=0.5):
        """Predict labels.

        X : array-like of shape = [n_samples, n_features]
            The input samples.
        threshold : float, default 0.5
            The decision threshold over probability to warrent a
            positive label.

        y : array of int of shape = [n_samples]
            Predicted labels for the given inpurt samples.
        if not self.estimator_fitted:
            raise NotFittedError(
                'The estimator must be fitted before calling predict().'
        return np.array([
            1.0 if p > treshold else -1.0
            for p in self.predict_proba(X)


  • sklearn.base.BaseEstimator
  • sklearn.base.ClassifierMixin


def fit(self, X, y)

Fits the classifier


X : array-like, shape = [n_samples, n_features]
The training input samples.
y : array-like, shape = [n_samples]
The target values. An array of int.


self : object
Returns self.
Expand source code Browse git
def fit(self, X, y):
    """Fits the classifier

    X : array-like, shape = [n_samples, n_features]
        The training input samples.
    y : array-like, shape = [n_samples]
        The target values. An array of int.

    self : object
        Returns self.
    positives = np.where(y == 1.0)[0]
    hold_out_size = int(np.ceil(len(positives) * self.hold_out_ratio))
    # check for the required number of positive examples
    if len(positives) <= hold_out_size:
        raise ValueError(
            'Not enough positive examples to estimate p(s=1|y=1,x).'
            ' Need at least {}.'.format(hold_out_size + 1)
    # construct the holdout set
    hold_out = positives[:hold_out_size]
    X_hold_out = X[hold_out]
    X = np.delete(X, hold_out, 0)
    y = np.delete(y, hold_out)
    # fit the inner estimator, y)
    hold_out_predictions = self.estimator.predict_proba(X_hold_out)
    hold_out_predictions = hold_out_predictions[:, 1]
    # update c, the positive proba estimate
    c = np.mean(hold_out_predictions)
    self.c = c
    self.estimator_fitted = True
def predict(self, X, treshold=0.5)

Predict labels.


X : array-like of shape = [n_samples, n_features]
The input samples.
threshold : float, default 0.5
The decision threshold over probability to warrent a positive label.


y : array of int of shape = [n_samples]
Predicted labels for the given inpurt samples.
Expand source code Browse git
def predict(self, X, treshold=0.5):
    """Predict labels.

    X : array-like of shape = [n_samples, n_features]
        The input samples.
    threshold : float, default 0.5
        The decision threshold over probability to warrent a
        positive label.

    y : array of int of shape = [n_samples]
        Predicted labels for the given inpurt samples.
    if not self.estimator_fitted:
        raise NotFittedError(
            'The estimator must be fitted before calling predict().'
    return np.array([
        1.0 if p > treshold else -1.0
        for p in self.predict_proba(X)
def predict_proba(self, X)

Predict class probabilities for X.


X : array-like of shape = [n_samples, n_features]
The input samples.


p : array of shape = [n_samples, n_classes]
The class probabilities of the input samples. The order of the classes corresponds to that in the attribute classes_.
Expand source code Browse git
def predict_proba(self, X):
    """Predict class probabilities for X.

    X : array-like of shape = [n_samples, n_features]
        The input samples.

    p : array of shape = [n_samples, n_classes]
        The class probabilities of the input samples. The order of the
        classes corresponds to that in the attribute classes_.
    if not self.estimator_fitted:
        raise NotFittedError(
            'The estimator must be fitted before calling predict_proba().'
    n = self.labeled
    m = self.labeled + self.unlabeled
    # self.estimator.predict_proba gives the probability of P(s=1|x)
    # for x belongs to P or U
    probabilistic_predictions = self.estimator.predict_proba(X)
    yEstimate = self._estimateEy(probabilistic_predictions)
    probabilistic_predictions = probabilistic_predictions[:, 1]
    numerator = probabilistic_predictions * (self.c * yEstimate * m)
    return numerator / float(n)