Source code for gbnet.models.ordinal_regression

import numpy as np
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.utils.validation import check_is_fitted
import torch
from torch import nn


[docs] def loadModule(module): assert module in {"XGBModule", "LGBModule"} if module == "XGBModule": from gbnet import xgbmodule return xgbmodule.XGBModule if module == "LGBModule": from gbnet import lgbmodule return lgbmodule.LGBModule
[docs] class GBOrd(BaseEstimator, ClassifierMixin): """Gradient Boosting Ordinal Regression model. This model combines gradient boosting with ordinal regression to predict ordered categorical outcomes. It uses either XGBoost or LightGBM as the underlying boosting engine wrapped in a PyTorch module. Parameters ---------- num_classes : int Number of ordinal classes to predict nrounds : int, optional Number of boosting rounds. Defaults to 500 for XGBModule and 1000 for LGBModule. params : dict, optional Additional parameters passed to the gradient boosting model. module_type : str, optional Type of gradient boosting module to use, either "XGBModule" or "LGBModule". Defaults to "LGBModule". min_hess : float, optional Minimum hessian value for numerical stability. Defaults to 0.0. Attributes ---------- model_ : XGBModule or LGBModule Trained gradient boosting module. Set after fitting. losses_ : list List of loss values recorded at each training iteration. min_targets : int Minimum value in training targets, used for label normalization. Methods ------- fit(X, y) Trains the model using input features X and ordinal targets y. predict(X) Predicts ordinal class labels for input features X. Notes ----- The model uses an ordinal logistic loss function to handle ordered categorical outcomes. The gradient boosting model learns a single score which is transformed into class probabilities via learned thresholds. """ def __init__( self, num_classes, nrounds=None, params=None, module_type="LGBModule", min_hess=0.0, ): if params is None: params = {} if nrounds is None: if module_type == "XGBModule": nrounds = 500 if module_type == "LGBModule": nrounds = 1000
[docs] self.nrounds = nrounds
[docs] self.params = params
[docs] self.model_ = None
[docs] self.losses_ = []
[docs] self.module_type = module_type
[docs] self.Module = loadModule(module_type)
[docs] self.loss_fn = OrdinalLogisticLoss(num_classes=num_classes)
[docs] self.num_classes = num_classes
[docs] self.min_hess = min_hess
[docs] def fit(self, X, y=None): self.min_targets = min(y) targets = torch.Tensor(y.values).flatten() targets = targets.long() targets = targets - self.min_targets assert len(np.unique(y)) == self.num_classes self.model_ = self.Module( X.shape[0], X.shape[1], 1, params=self.params, min_hess=self.min_hess ) self.model_.train() optimizer = torch.optim.Adam( list(self.model_.parameters()) + list(self.loss_fn.parameters()), lr=0.01 ) for _ in range(self.nrounds): optimizer.zero_grad() preds = self.model_(X).flatten() loss = self.loss_fn(preds, targets) loss.backward(create_graph=True) self.losses_.append(loss.detach().item()) self.model_.gb_step() optimizer.step() self.model_.eval() return self
[docs] def score(self, X, y): """ Return the negative log likelihood score for input X and targets y. Parameters ---------- X : array-like of shape (n_samples, n_features) Input features. y : array-like of shape (n_samples,) Target values. Returns ------- float Negative log likelihood score. Lower values indicate better fit. """ check_is_fitted(self, "model_") targets = torch.Tensor(y.values).flatten().long() targets = targets - torch.min(targets) logits = self.model_(X).flatten() neg_log_likelihood = self.loss_fn(logits, targets) return neg_log_likelihood.detach().item()
[docs] def predict_proba(self, X): """ Predict class probabilities for input X. Parameters ---------- X : array-like of shape (n_samples, n_features) Input features. Returns ------- array-like of shape (n_samples, n_classes) Predicted class probabilities. """ check_is_fitted(self, "model_") logits = self.model_(X).flatten() probs = self.loss_fn.get_pred_probs(logits).detach().numpy() return probs
[docs] def predict(self, X, return_logits=True): """ Predict continuous output for input X. """ check_is_fitted(self, "model_") if return_logits: preds = self.model_(X).detach().numpy() return preds.flatten() else: preds = self.predict_proba(X).argmax(axis=1) + self.min_targets return preds
[docs] class OrdinalLogisticLoss(nn.Module): def __init__(self, num_classes): super().__init__()
[docs] self.num_classes = num_classes
[docs] self.breakpoints = nn.Parameter( torch.arange(num_classes - 1, dtype=torch.float32) - (num_classes - 2.0) / 2.0 )
[docs] def _compute_probabilities(self, logits): """ Compute class probabilities """ if logits.dim() == 2: logits = logits.squeeze(1) # Compute cumulative probabilities cum_probs = torch.sigmoid(self.breakpoints.unsqueeze(0) - logits.unsqueeze(1)) eps = 1e-8 probs = torch.diff( cum_probs, prepend=torch.zeros_like(cum_probs[:, :1]), append=torch.ones_like(cum_probs[:, :1]), ).clamp(min=eps) return probs
[docs] def forward(self, logits, targets): """ Compute loss more efficiently but maintaining numerical equivalence. """ targets = targets.flatten() probs = self._compute_probabilities(logits) # More efficient than one-hot but numerically equivalent target_probs = probs[torch.arange(probs.size(0)), targets] loss = -torch.log(target_probs).mean() return loss
[docs] def get_pred_probs(self, logits): """ Predict most likely class. """ return self._compute_probabilities(logits)