Source code for gbnet.lgbmodule

from typing import Union
import lightgbm as lgb
import numpy as np
import pandas as pd
import torch
from torch import nn

from gbnet.base import BaseGBModule


[docs] class LGBModule(BaseGBModule): """LightGBM Module that wraps LightGBM boosting into a PyTorch Module. This module allows integration of LightGBM gradient boosting with PyTorch neural networks. It maintains the boosting model state and handles both training and inference. Args: batch_size (int): Size of training data input_dim (int): Dimension of input features output_dim (int): Dimension of output predictions params (dict, optional): Parameters passed to LightGBM. Defaults to {}. min_hess (float, optional): Minimum hessian value submitted to LightGBM. Defaults to 0. Attributes: batch_size (int): Size of mini-batches input_dim (int): Input feature dimension output_dim (int): Output prediction dimension params (dict): LightGBM parameters bst (lightgbm.Booster): The underlying LightGBM booster FX (torch.nn.Parameter): Current predictions tensor train_dat (lightgbm.Dataset): Training dataset used for caching min_hess (float): Minimum hessian threshold """ def __init__(self, batch_size, input_dim, output_dim, params={}, min_hess=0): super(LGBModule, self).__init__()
[docs] self.batch_size = batch_size
[docs] self.input_dim = input_dim
[docs] self.output_dim = output_dim
assert "objective" not in params, "objective should not be specified in params"
[docs] self.params = params.copy()
[docs] self.bst = None
[docs] self.FX = nn.Parameter( torch.tensor( np.zeros([batch_size, output_dim]), dtype=torch.float, ) )
[docs] self.train_dat = None
[docs] self.min_hess = min_hess
[docs] self.grad = None
[docs] self.hess = None
[docs] def _set_train_dat(self, input_dataset: lgb.Dataset): if input_dataset.params is None: input_dataset.params = {"verbose": -1} else: input_dataset.params.update({"verbose": -1}) input_dataset.free_raw_data = False self.train_dat = input_dataset
[docs] def _input_checking_setting( self, input_dataset: Union[lgb.Dataset, np.ndarray, pd.DataFrame] ): assert isinstance(input_dataset, (lgb.Dataset, np.ndarray, pd.DataFrame)) if self.training: if self.train_dat is None: self._set_train_dat( input_dataset if isinstance(input_dataset, lgb.Dataset) else lgb.Dataset(input_dataset) ) if self.bst is None: return self.train_dat if isinstance(input_dataset, lgb.Dataset): assert ( input_dataset._handle is not None ), "Changing datasets during training is not supported. If trying to do prediction, call LGBModule.eval() first." else: # NEW assert ( self.batch_size == input_dataset.shape[0] ), "Changing datasets during training is not supported. If trying to do prediction, call LGBModule.eval() first." return self.train_dat if isinstance(input_dataset, lgb.Dataset): # Clunky way to get original data input_dataset.free_raw_data = False input_dataset.construct() return input_dataset.get_data() return input_dataset
[docs] def forward( self, input_dataset: Union[lgb.Dataset, np.ndarray, pd.DataFrame], return_tensor=True, ): """Forward pass through the LightGBM module. Args: input_dataset (Union[lgb.Dataset, np.ndarray, pd.DataFrame]): Input data for prediction. Can be a LightGBM Dataset, numpy array, or pandas DataFrame. return_tensor (bool, optional): Whether to return predictions as a PyTorch tensor. Defaults to True. Returns: Union[torch.Tensor, np.ndarray]: Model predictions. Returns a PyTorch tensor if return_tensor=True, otherwise returns a numpy array. The forward pass handles both train and eval - In train mode, maintains state between iterations and updates internal FX tensor - In eval mode, generates predictions on new data using the trained model """ input_dataset = self._input_checking_setting(input_dataset) # TODO figure out how actual batch training works here if self.training: if self.bst: preds = self.bst._Booster__inner_predict(0).copy() else: preds = np.zeros([self.batch_size, self.output_dim]) else: if self.bst: preds = self.bst.predict(input_dataset).copy() else: preds = np.zeros( [input_dataset.shape[0], self.output_dim], dtype=torch.float ) if self.training: FX_detach = self.FX.detach() FX_detach.copy_( torch.tensor( preds.reshape([self.batch_size, self.output_dim]), dtype=torch.float ) ) if return_tensor: if self.training: return self.FX else: return torch.tensor( preds.reshape([-1, self.output_dim]), dtype=torch.float ) return preds
[docs] def gb_calc(self): self.grad, self.hess = self._get_grad_hess_FX()
[docs] def gb_step(self): """Performs a gradient boosting step to update the model. This method: 1. Computes gradients and hessians from the current predictions 2. Creates a LightGBM objective using the computed gradients/hessians 3. Updates the internal boosting model by either: - Updating the existing model if one exists - Training a new model for 1 boosting round if no model exists The gradients are scaled by batch size and hessians are clipped to a minimum value to ensure numerical stability. Returns: None """ if self.grad is None and self.hess is None: self.gb_calc() obj = LightGBObj(self.grad, self.hess) input_params = self.params.copy() input_params.update( { "objective": obj, "num_class": self.output_dim, "verbose": -1, "verbosity": -1, } ) if self.bst is not None: self.bst.update(train_set=self.train_dat, fobj=obj) else: self.bst = lgb.train( params=input_params, train_set=self.train_dat, num_boost_round=1, keep_training_booster=True, ) self.grad = None self.hess = None
[docs] def get_extra_state(self): return self.bst.model_to_string() if self.bst else None
[docs] def set_extra_state(self, state): if state is not None: self.bst = lgb.Booster(model_str=state) else: self.bst = None
[docs] class LightGBObj: """Helper class for use with LightGBM as a backend for LGBModule""" def __init__(self, grad, hess):
[docs] self.grad = grad.detach().numpy()
[docs] self.hess = hess.detach().numpy()
[docs] def __call__(self, y_true, y_pred): if self.grad.shape[1] > 1: return self.grad, self.hess return self.grad.flatten(), self.hess.flatten()