Source code for gbnet.xgbmodule

from typing import Union
import warnings
import numpy as np
import pandas as pd
import torch
import xgboost as xgb
from torch import nn

from gbnet.base import BaseGBModule



[docs]
class XGBModule(BaseGBModule):
    """XGBoost Module that wraps XGBoost boosting into a PyTorch Module.

    This module allows integration of XGBoost gradient boosting with PyTorch neural networks.
    It maintains the boosting model state and handles both training and inference.

    Args:
        batch_size (int): Size of training data
        input_dim (int): Dimension of input features
        output_dim (int): Dimension of output predictions
        params (dict, optional): Parameters passed to LightGBM. Defaults to {}.
        min_hess (float, optional): Minimum hessian value submitted to LightGBM. Defaults to 0.

    Attributes:
        batch_size (int): Size of mini-batches
        input_dim (int): Input feature dimension
        output_dim (int): Output prediction dimension
        params (dict): LightGBM parameters
        bst (lightgbm.Booster): The underlying LightGBM booster
        FX (torch.nn.Parameter): Current predictions tensor
        train_dat (lightgbm.Dataset): Training dataset used for caching
        min_hess (float): Minimum hessian threshold
    """

    def __init__(self, batch_size, input_dim, output_dim, params={}, min_hess=0):
        super(XGBModule, self).__init__()

[docs]
        self.batch_size = batch_size


[docs]
        self.input_dim = input_dim


[docs]
        self.output_dim = output_dim



[docs]
        self.params = params.copy()


        assert (
            "objective" not in self.params
        ), "objective should not be specified in params"
        assert (
            "base_score" not in self.params
        ), "base_score should not be specified in params"

        self.params["objective"] = "reg:squarederror"
        self.params["base_score"] = 0

[docs]
        self.n_completed_boost_rounds = 0


[docs]
        self.min_hess = min_hess


        init_matrix = np.zeros([batch_size, input_dim])

[docs]
        self.bst = xgb.train(
            self.params,
            xgb.DMatrix(init_matrix, label=np.zeros(batch_size * output_dim)),
            num_boost_round=0,
        )

        self.n_completed_boost_rounds = 0

[docs]
        self.dtrain = None


[docs]
        self.training_n = None



[docs]
        self.FX = nn.Parameter(
            torch.tensor(
                np.zeros([batch_size, output_dim]),
                dtype=torch.float,
            )
        )



[docs]
    def _check_training_data(self):
        if self.dtrain.get_weight().shape[0] > 0:
            warnings.warn(
                "Weights will not work properly when defined as part of the input DMatrix. Weights should be defined in the loss."
            )



[docs]
    def _input_checking_setting(
        self, input_data: Union[xgb.DMatrix, pd.DataFrame, np.ndarray]
    ):
        assert isinstance(input_data, (xgb.DMatrix, pd.DataFrame, np.ndarray))
        if self.training:
            if self.dtrain is None:
                if isinstance(input_data, xgb.DMatrix):
                    input_data.set_label(np.zeros(self.batch_size * self.output_dim))
                    self.dtrain = input_data
                    self.training_n = input_data.num_row()
                    self._check_training_data()
                else:
                    self.dtrain = xgb.DMatrix(
                        input_data, label=np.zeros(self.batch_size * self.output_dim)
                    )
                    self.training_n = input_data.shape[0]
            compare_n = (
                input_data.num_row()
                if isinstance(input_data, xgb.DMatrix)
                else input_data.shape[0]
            )
            assert (
                compare_n == self.training_n
            ), "Changing datasets while training is not currently supported. If trying to make predictions, set Module to eval mode via `Module.eval()`"
            return self.dtrain
        return (
            input_data
            if isinstance(input_data, xgb.DMatrix)
            else xgb.DMatrix(input_data)
        )



[docs]
    def forward(
        self,
        input_data: Union[xgb.DMatrix, np.ndarray, pd.DataFrame],
        return_tensor: bool = True,
    ):
        """Forward pass through the XGBoost module.

        Args:
            input_dataset (Union[xgb.DMatrix, np.ndarray, pd.DataFrame]): Input data for prediction.
                Can be a XGBoost DMatrix, numpy array, or pandas DataFrame.
            return_tensor (bool, optional): Whether to return predictions as a PyTorch tensor.
                Defaults to True.

        Returns:
            Union[torch.Tensor, np.ndarray]: Model predictions. Returns a PyTorch tensor if
            return_tensor=True, otherwise returns a numpy array.

        The forward pass handles both train and eval
        - In train mode, maintains state between iterations and updates internal FX tensor
        - In eval mode, generates predictions on new data using the trained model
        """
        input_data = self._input_checking_setting(input_data)
        preds = self.bst.predict(input_data)

        if self.training:
            FX_detach = self.FX.detach()
            FX_detach.copy_(
                torch.tensor(
                    preds.reshape([self.batch_size, self.output_dim]), dtype=torch.float
                )
            )

        if return_tensor:
            if self.training:
                return self.FX
            else:
                return torch.tensor(
                    preds.reshape([-1, self.output_dim]), dtype=torch.float
                )
        return preds



[docs]
    def gb_calc(self):
        self.grad, self.hess = self._get_grad_hess_FX()



[docs]
    def gb_step(self):
        """Performs a gradient boosting step to update the model.

        This method:
        1. Computes gradients and hessians from the current predictions
        3. Updates the internal boosting model

        The gradients are scaled by batch size and hessians are clipped to a minimum value
        to ensure numerical stability.

        Returns:
            None
        """
        if self.grad is None and self.hess is None:
            self.gb_calc()

        self._gb_step_grad_hess(self.grad, self.hess)
        self.grad = None
        self.hess = None



[docs]
    def _gb_step_grad_hess(self, grad, hess):
        obj = XGBObj(grad, hess)
        g, h = obj(np.zeros([self.batch_size, self.output_dim]), None)

        if xgb.__version__ <= "2.0.3":
            self.bst.boost(
                self.dtrain,
                g,
                h,
            )
        else:
            self.bst.boost(
                dtrain=self.dtrain,
                iteration=self.n_completed_boost_rounds + 1,
                grad=g,
                hess=h,
            )
        self.n_completed_boost_rounds = self.n_completed_boost_rounds + 1



[docs]
    def get_extra_state(self):
        return self.bst.save_raw()



[docs]
    def set_extra_state(self, state):
        self.bst = xgb.Booster(model_file=state)





[docs]
class XGBObj:
    """Helper class for use with XGBoost as a backend for XGBModule"""

    def __init__(self, grad, hess):

[docs]
        self.grad = grad


[docs]
        self.hess = hess



[docs]
    def __call__(self, preds, dtrain):
        if len(preds.shape) == 2:
            M = preds.shape[0]
            N = preds.shape[1]
        else:
            M = preds.shape[0]
            N = 1

        if xgb.__version__ >= "2.1.0":
            g = self.grad.detach().numpy().reshape([M, N])
            h = self.hess.detach().numpy().reshape([M, N])
        else:
            g = self.grad.detach().numpy().reshape([M * N, 1])
            h = self.hess.detach().numpy().reshape([M * N, 1])

        return g, h