Source code for gbnet.models.ordinal_regression

import numpy as np
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.utils.validation import check_is_fitted
import torch
from torch import nn



[docs]
def loadModule(module):
    assert module in {"XGBModule", "LGBModule"}
    if module == "XGBModule":
        from gbnet import xgbmodule

        return xgbmodule.XGBModule
    if module == "LGBModule":
        from gbnet import lgbmodule

        return lgbmodule.LGBModule




[docs]
class GBOrd(BaseEstimator, ClassifierMixin):
    """Gradient Boosting Ordinal Regression model.

    This model combines gradient boosting with ordinal regression to predict ordered
    categorical outcomes. It uses either XGBoost or LightGBM as the underlying boosting
    engine wrapped in a PyTorch module.

    Parameters
    ----------
    num_classes : int
        Number of ordinal classes to predict
    nrounds : int, optional
        Number of boosting rounds. Defaults to 500 for XGBModule and 1000 for LGBModule.
    params : dict, optional
        Additional parameters passed to the gradient boosting model.
    module_type : str, optional
        Type of gradient boosting module to use, either "XGBModule" or "LGBModule".
        Defaults to "LGBModule".
    min_hess : float, optional
        Minimum hessian value for numerical stability. Defaults to 0.0.

    Attributes
    ----------
    model_ : XGBModule or LGBModule
        Trained gradient boosting module. Set after fitting.
    losses_ : list
        List of loss values recorded at each training iteration.
    min_targets : int
        Minimum value in training targets, used for label normalization.

    Methods
    -------
    fit(X, y)
        Trains the model using input features X and ordinal targets y.
    predict(X)
        Predicts ordinal class labels for input features X.

    Notes
    -----
    The model uses an ordinal logistic loss function to handle ordered categorical outcomes.
    The gradient boosting model learns a single score which is transformed into class
    probabilities via learned thresholds.
    """

    def __init__(
        self,
        num_classes,
        nrounds=None,
        params=None,
        module_type="LGBModule",
        min_hess=0.0,
    ):
        if params is None:
            params = {}
        if nrounds is None:
            if module_type == "XGBModule":
                nrounds = 500
            if module_type == "LGBModule":
                nrounds = 1000

[docs]
        self.nrounds = nrounds


[docs]
        self.params = params


[docs]
        self.model_ = None


[docs]
        self.losses_ = []


[docs]
        self.module_type = module_type


[docs]
        self.Module = loadModule(module_type)


[docs]
        self.loss_fn = OrdinalLogisticLoss(num_classes=num_classes)


[docs]
        self.num_classes = num_classes


[docs]
        self.min_hess = min_hess



[docs]
    def fit(self, X, y=None):
        self.min_targets = min(y)
        targets = torch.Tensor(y.values).flatten()
        targets = targets.long()
        targets = targets - self.min_targets
        assert len(np.unique(y)) == self.num_classes

        self.model_ = self.Module(
            X.shape[0], X.shape[1], 1, params=self.params, min_hess=self.min_hess
        )
        self.model_.train()

        optimizer = torch.optim.Adam(
            list(self.model_.parameters()) + list(self.loss_fn.parameters()), lr=0.01
        )

        for _ in range(self.nrounds):
            optimizer.zero_grad()
            preds = self.model_(X).flatten()
            loss = self.loss_fn(preds, targets)
            loss.backward(create_graph=True)
            self.losses_.append(loss.detach().item())
            self.model_.gb_step()
            optimizer.step()

        self.model_.eval()
        return self



[docs]
    def score(self, X, y):
        """
        Return the negative log likelihood score for input X and targets y.

        Parameters
        ----------
        X : array-like of shape (n_samples, n_features)
            Input features.
        y : array-like of shape (n_samples,)
            Target values.

        Returns
        -------
        float
            Negative log likelihood score. Lower values indicate better fit.
        """
        check_is_fitted(self, "model_")
        targets = torch.Tensor(y.values).flatten().long()
        targets = targets - torch.min(targets)
        logits = self.model_(X).flatten()
        neg_log_likelihood = self.loss_fn(logits, targets)
        return neg_log_likelihood.detach().item()



[docs]
    def predict_proba(self, X):
        """
        Predict class probabilities for input X.

        Parameters
        ----------
        X : array-like of shape (n_samples, n_features)
            Input features.

        Returns
        -------
        array-like of shape (n_samples, n_classes)
            Predicted class probabilities.
        """
        check_is_fitted(self, "model_")
        logits = self.model_(X).flatten()
        probs = self.loss_fn.get_pred_probs(logits).detach().numpy()
        return probs



[docs]
    def predict(self, X, return_logits=True):
        """
        Predict continuous output for input X.
        """
        check_is_fitted(self, "model_")
        if return_logits:
            preds = self.model_(X).detach().numpy()
            return preds.flatten()
        else:
            preds = self.predict_proba(X).argmax(axis=1) + self.min_targets
            return preds





[docs]
class OrdinalLogisticLoss(nn.Module):
    def __init__(self, num_classes):
        super().__init__()

[docs]
        self.num_classes = num_classes



[docs]
        self.breakpoints = nn.Parameter(
            torch.arange(num_classes - 1, dtype=torch.float32)
            - (num_classes - 2.0) / 2.0
        )



[docs]
    def _compute_probabilities(self, logits):
        """
        Compute class probabilities
        """
        if logits.dim() == 2:
            logits = logits.squeeze(1)

        # Compute cumulative probabilities
        cum_probs = torch.sigmoid(self.breakpoints.unsqueeze(0) - logits.unsqueeze(1))

        eps = 1e-8
        probs = torch.diff(
            cum_probs,
            prepend=torch.zeros_like(cum_probs[:, :1]),
            append=torch.ones_like(cum_probs[:, :1]),
        ).clamp(min=eps)

        return probs



[docs]
    def forward(self, logits, targets):
        """
        Compute loss more efficiently but maintaining numerical equivalence.
        """
        targets = targets.flatten()
        probs = self._compute_probabilities(logits)

        # More efficient than one-hot but numerically equivalent
        target_probs = probs[torch.arange(probs.size(0)), targets]
        loss = -torch.log(target_probs).mean()

        return loss



[docs]
    def get_pred_probs(self, logits):
        """
        Predict most likely class.
        """
        return self._compute_probabilities(logits)