Source code for gbnet.gblinear

from typing import Union

import numpy as np
import pandas as pd

from scipy.linalg import cho_solve, cho_factor
import torch
import torch.nn as nn

from gbnet.base import BaseGBModule


[docs] class GBLinear(BaseGBModule): """A linear gradient boosting module that uses gradient boosting for updates. This module implements a linear layer that can be trained using gradient boosting. It maintains state between iterations and updates parameters using computed gradients and hessians. Parameters ---------- input_dim : int Input feature dimension output_dim : int Output prediction dimension bias : bool, optional Whether to include a bias term. Defaults to True. lr : float, optional Learning rate for parameter updates. Defaults to 0.5. min_hess : float, optional Minimum hessian threshold. Defaults to 0.0. lambd : float, optional L2 regularization parameter. Defaults to 0.01. Attributes ---------- linear : nn.Linear The underlying linear layer FX : torch.Tensor Current predictions tensor input : numpy.ndarray Input data cache g : torch.Tensor Gradient cache h : torch.Tensor Hessian cache """ def __init__( self, input_dim: int, output_dim: int, bias: bool = True, lr: float = 0.5, min_hess: float = 0.0, lambd: float = 0.01, ) -> None: super(GBLinear, self).__init__()
[docs] self.input_dim = input_dim
[docs] self.output_dim = output_dim
[docs] self.min_hess = min_hess
[docs] self.bias = bias
[docs] self.lr = lr
[docs] self.lambd = lambd
[docs] self.linear = nn.Linear(self.input_dim, self.output_dim, bias=self.bias)
[docs] self.FX = None
[docs] self.input = None
[docs] self.g = None
[docs] self.h = None
[docs] def _input_checking_setting(self, x: Union[torch.Tensor, np.ndarray, pd.DataFrame]): assert isinstance(x, (torch.Tensor, np.ndarray, pd.DataFrame)) if isinstance(x, np.ndarray): x = torch.Tensor(x) if isinstance(x, pd.DataFrame): x = torch.Tensor(np.array(x)) if self.training: self.input = x.detach().numpy() # TODO add input checks return x
[docs] def forward(self, x: Union[torch.Tensor, np.ndarray, pd.DataFrame]): x = self._input_checking_setting(x) self.FX = self.linear(x) if self.training: self.FX.retain_grad() return self.FX
[docs] def gb_calc(self): """Calculate gradients and stores in the object""" if self.FX is None or self.FX.grad is None: raise RuntimeError("Backward must be called before gb_step.") self.g, self.h = self._get_grad_hess_FX()
[docs] def gb_step(self): """Uses stored gradients to update weights""" if self.g is None and self.h is None: self.gb_calc() with torch.no_grad(): if self.bias: X = np.concatenate( [np.ones([self.input.shape[0], 1]), self.input], axis=1 ) else: X = self.input h = torch.nan_to_num(self.h, nan=1.0) updated_B = ridge_regression(X, (self.g / h).detach().numpy(), self.lambd) updated_weight_dir = updated_B[1:, :].T self.linear.weight -= self.lr * torch.Tensor(updated_weight_dir) if self.bias: updated_bias_dir = updated_B[0:1, :].flatten() self.linear.bias -= self.lr * torch.Tensor(updated_bias_dir) self.g = None self.h = None
[docs] def ridge_regression(X, y, lambd): """Solves ridge regression using Cholesky decomposition. Fastest method tested. Args: X (np.ndarray): Design matrix of shape (n_samples, n_features) y (np.ndarray): Target values of shape (n_samples,) lambd (float): Ridge regularization parameter Returns: np.ndarray: Fitted coefficients of shape (n_features,) The function solves the ridge regression problem: min_beta ||X beta - y||^2 + lambd ||beta||^2 using the normal equations and Cholesky decomposition for numerical stability. """ n, d = X.shape A = X.T @ X + lambd * np.eye(d) c = X.T @ y L = cho_factor(A) beta = cho_solve(L, c) return beta