diff --git a/demo/guide-python/multioutput_reduced_gradient.py b/demo/guide-python/multioutput_reduced_gradient.py index 29e828806f7a..ebc5a9a6eab0 100644 --- a/demo/guide-python/multioutput_reduced_gradient.py +++ b/demo/guide-python/multioutput_reduced_gradient.py @@ -18,14 +18,13 @@ from typing import Tuple import numpy as np +import xgboost as xgb from sklearn.base import BaseEstimator from sklearn.datasets import make_regression - -import xgboost as xgb -from xgboost.objective import TreeObjective +from xgboost.objective import Objective -class LsObjMean(TreeObjective): +class LsObjMean(Objective): """Least squared error. Reduce the size of the gradient using mean value.""" def __init__(self, device: str) -> None: @@ -39,12 +38,12 @@ def __call__( if self.device == "cpu": hess = np.ones(grad.shape) return grad, hess - else: - import cupy as cp - hess = cp.ones(grad.shape) + import cupy as cp + + hess = cp.ones(grad.shape) - return cp.array(grad), cp.array(hess) + return cp.array(grad), cp.array(hess) def split_grad( self, iteration: int, grad: np.ndarray, hess: np.ndarray @@ -59,7 +58,7 @@ def split_grad( return sgrad, shess -def svd_class(device: str) -> BaseEstimator: +def svd_class() -> BaseEstimator: """One of the methods in the sketch boost paper.""" from sklearn.decomposition import TruncatedSVD @@ -77,10 +76,10 @@ def __init__(self, device: str) -> None: def split_grad( self, iteration: int, grad: np.ndarray, hess: np.ndarray ) -> Tuple[np.ndarray, np.ndarray]: - svd = svd_class(self.device) + svd = svd_class() if self.device == "cuda": - grad = grad.get() # type: ignore - hess = hess.get() # type: ignore + grad = grad.get() # type: ignore + hess = hess.get() # type: ignore svd.fit(grad) grad = svd.transform(grad) @@ -95,6 +94,7 @@ def split_grad( def main() -> None: + """Entry point to the demo, use `--device` to choose between CPU and GPU.""" parser = argparse.ArgumentParser() parser.add_argument("--device", choices=["cpu", "cuda"], default="cpu") args = parser.parse_args() diff --git a/doc/tutorials/multioutput.rst b/doc/tutorials/multioutput.rst index 9cc4be01e8bc..a025bc70c95b 100644 --- a/doc/tutorials/multioutput.rst +++ b/doc/tutorials/multioutput.rst @@ -106,17 +106,17 @@ function for leaf values. The `Sketch Boost` paper proposes using dimensionality on the gradient matrix. In practice, one can also define a different but related loss with a small gradient matrix for finding the tree structure. -To access this feature, create a custom objective that inherits from ``TreeObjective`` and +To access this feature, create a custom objective that inherits from ``Objective`` and implement the ``split_grad`` method. .. code-block:: python - from xgboost.objective import TreeObjective + from xgboost.objective import Objective from cuml.decomposition import TruncatedSVD import cupy as cp - class LsObj(TreeObjective): + class LsObj(Objective): def __call__(self, iteration: int, y_pred, dtrain): """Least squared error.""" y_true = dtrain.get_label() diff --git a/python-package/xgboost/core.py b/python-package/xgboost/core.py index 07b18ab05c33..c282424854dc 100644 --- a/python-package/xgboost/core.py +++ b/python-package/xgboost/core.py @@ -87,7 +87,7 @@ is_pyarrow_available, py_str, ) -from .objective import Objective, TreeObjective, _grad_arrinf +from .objective import Objective, _BuiltInObjective, _grad_arrinf, _stringify if TYPE_CHECKING: from pandas import DataFrame as PdDataFrame @@ -2162,13 +2162,11 @@ def set_param( elif isinstance(params, str) and value is not None: params = [(params, value)] for key, val in cast(Iterable[Tuple[str, str]], params): - if isinstance(val, np.ndarray): - val = val.tolist() - elif hasattr(val, "__cuda_array_interface__") and hasattr(val, "tolist"): - val = val.tolist() if val is not None: _check_call( - _LIB.XGBoosterSetParam(self.handle, c_str(key), c_str(str(val))) + _LIB.XGBoosterSetParam( + self.handle, c_str(key), c_str(_stringify(val)) + ) ) def update( @@ -2198,7 +2196,7 @@ def update( raise TypeError(f"Invalid training matrix: {type(dtrain).__name__}") self._assign_dmatrix_features(dtrain) - if fobj is None: + if fobj is None or isinstance(fobj, _BuiltInObjective): _check_call( _LIB.XGBoosterUpdateOneIter( self.handle, ctypes.c_int(iteration), dtrain.handle @@ -2280,21 +2278,14 @@ def train_one_iter(grad: NumpyOrCupy, hess: NumpyOrCupy) -> None: vgrad: Optional[ArrayLike] vhess: Optional[ArrayLike] - if isinstance(fobj, TreeObjective): - # full gradient for leaf values + if isinstance(fobj, Objective): vgrad, vhess = fobj(iteration, y_pred, dtrain) - # Reduced gradient for split nodes split_grad = fobj.split_grad(iteration, vgrad, vhess) - # Switch the role of gradient if there's no split gradient but the tree - # objective is used. if split_grad is not None: sgrad, shess = split_grad else: sgrad, shess = vgrad, vhess vgrad, vhess = None, None - elif isinstance(fobj, Objective): - sgrad, shess = fobj(iteration, y_pred, dtrain) - vgrad, vhess = None, None else: # Plain callable sgrad, shess = fobj(y_pred, dtrain) diff --git a/python-package/xgboost/objective.py b/python-package/xgboost/objective.py index 8da01068009e..81fd7a5006c3 100644 --- a/python-package/xgboost/objective.py +++ b/python-package/xgboost/objective.py @@ -1,6 +1,13 @@ +# pylint: disable=missing-class-docstring """Experimental support for a new objective interface with target dimension reduction. + +This module exposes built-in objectives like ``reg:squarederror`` into the Python +interface, and enables users to specify parameters for some objectives like +``reg:quantileerror``. In addition, one can define a custom ``split_grad`` for training +vector-leaf models. + .. warning:: Do not use this module unless you want to participate in development. @@ -11,7 +18,7 @@ import warnings from abc import ABC, abstractmethod -from typing import TYPE_CHECKING, Tuple +from typing import TYPE_CHECKING, Any, Dict, Tuple import numpy as np @@ -44,18 +51,6 @@ def __call__( self, iteration: int, y_pred: ArrayLike, dtrain: "DMatrix" ) -> Tuple[ArrayLike, ArrayLike]: ... - -class TreeObjective(Objective): - """Base class for tree-specific custom objective functions. - - .. warning:: - - Do not use this class unless you want to participate in development. - - .. versionadded:: 3.2.0 - - """ - # pylint: disable=unused-argument def split_grad( self, iteration: int, grad: ArrayLike, hess: ArrayLike @@ -64,8 +59,168 @@ def split_grad( return None +class _BuiltInObjective: + """Base class for Python wrappers of built-in C++ objective functions.""" + + _name: str = "" + _KNOWN_PARAMS: Dict[str, str] = {} + + def __init__(self, **kwargs: Any) -> None: + self._params: Dict[str, Any] = {} + for py_name in self._KNOWN_PARAMS: + self._params[py_name] = kwargs.pop(py_name, None) + if kwargs: + raise TypeError(f"Unknown parameters for {self._name}: {list(kwargs)}") + + @property + def name(self) -> str: + """The objective name string.""" + return self._name + + # pylint: disable=missing-function-docstring + def flat_params(self) -> Dict[str, str]: + result: Dict[str, str] = {"objective": self._name} + for py_name, cpp_name in self._KNOWN_PARAMS.items(): + value = self._params[py_name] + if value is not None: + result[cpp_name] = _stringify(value) + return result + + +def _stringify(value: Any) -> str: + if isinstance(value, np.ndarray): + value = value.tolist() + elif hasattr(value, "__cuda_array_interface__") and hasattr(value, "tolist"): + value = value.tolist() + return str(value) + + +# Regression objectives + + +class RegSquaredError(_BuiltInObjective): + _name = "reg:squarederror" + _KNOWN_PARAMS = {"scale_pos_weight": "scale_pos_weight"} + + +class RegSquaredLogError(_BuiltInObjective): + _name = "reg:squaredlogerror" + + +class RegAbsoluteError(_BuiltInObjective): + _name = "reg:absoluteerror" + + +class RegPseudoHuberError(_BuiltInObjective): + _name = "reg:pseudohubererror" + _KNOWN_PARAMS = {"delta": "huber_slope"} + + +class RegQuantileError(_BuiltInObjective): + _name = "reg:quantileerror" + _KNOWN_PARAMS = {"alpha": "quantile_alpha"} + + +class RegExpectileError(_BuiltInObjective): + _name = "reg:expectileerror" + _KNOWN_PARAMS = {"alpha": "expectile_alpha"} + + +class RegTweedie(_BuiltInObjective): + _name = "reg:tweedie" + _KNOWN_PARAMS = {"variance_power": "tweedie_variance_power"} + + +class CountPoisson(_BuiltInObjective): + _name = "count:poisson" + _KNOWN_PARAMS = {"max_delta_step": "max_delta_step"} + + +# Logistic / classification objectives + + +class RegLogistic(_BuiltInObjective): + _name = "reg:logistic" + _KNOWN_PARAMS = {"scale_pos_weight": "scale_pos_weight"} + + +class BinaryLogistic(_BuiltInObjective): + _name = "binary:logistic" + _KNOWN_PARAMS = {"scale_pos_weight": "scale_pos_weight"} + + +class RegGamma(_BuiltInObjective): + _name = "reg:gamma" + _KNOWN_PARAMS = {"scale_pos_weight": "scale_pos_weight"} + + +class BinaryLogitRaw(_BuiltInObjective): + _name = "binary:logitraw" + _KNOWN_PARAMS = {"scale_pos_weight": "scale_pos_weight"} + + +class BinaryHinge(_BuiltInObjective): + _name = "binary:hinge" + + +# Multiclass objectives + + +class MultiSoftmax(_BuiltInObjective): + _name = "multi:softmax" + _KNOWN_PARAMS = {"num_class": "num_class"} + + +class MultiSoftprob(_BuiltInObjective): + _name = "multi:softprob" + _KNOWN_PARAMS = {"num_class": "num_class"} + + +# Survival objectives + + +class SurvivalAFT(_BuiltInObjective): + _name = "survival:aft" + _KNOWN_PARAMS = { + "distribution": "aft_loss_distribution", + "distribution_scale": "aft_loss_distribution_scale", + } + + +class SurvivalCox(_BuiltInObjective): + _name = "survival:cox" + + +# Ranking objectives + + +class RankNDCG(_BuiltInObjective): + _name = "rank:ndcg" + _KNOWN_PARAMS = { + "pair_method": "lambdarank_pair_method", + "num_pair_per_sample": "lambdarank_num_pair_per_sample", + "unbiased": "lambdarank_unbiased", + "exp_gain": "ndcg_exp_gain", + } + + +class RankPairwise(_BuiltInObjective): + _name = "rank:pairwise" + _KNOWN_PARAMS = { + "pair_method": "lambdarank_pair_method", + "num_pair_per_sample": "lambdarank_num_pair_per_sample", + } + + +class RankMAP(_BuiltInObjective): + _name = "rank:map" + _KNOWN_PARAMS = { + "pair_method": "lambdarank_pair_method", + "num_pair_per_sample": "lambdarank_num_pair_per_sample", + } + + def _grad_arrinf(array: NumpyOrCupy, n_samples: int) -> bytes: - # Can we check for __array_interface__ instead of a specific type instead? msg = ( "Expecting `np.ndarray` or `cupy.ndarray` for gradient and hessian." f" Got: {type(array)}" diff --git a/python-package/xgboost/sklearn.py b/python-package/xgboost/sklearn.py index b49f0ae85fcc..ad004eaab52b 100644 --- a/python-package/xgboost/sklearn.py +++ b/python-package/xgboost/sklearn.py @@ -71,6 +71,7 @@ _is_pandas_df, _is_polars_lazyframe, ) +from .objective import _BuiltInObjective from .training import train @@ -1358,11 +1359,13 @@ def fit( feature_types=feature_types, ) - if callable(self.objective): - obj: Optional[PlainObj] = _objective_decorator(self.objective) + obj: Optional[Union[PlainObj, _BuiltInObjective]] = None + if isinstance(self.objective, _BuiltInObjective): + obj = self.objective + params["objective"] = self.objective.name + elif callable(self.objective): + obj = _objective_decorator(self.objective) params["objective"] = "reg:squarederror" - else: - obj = None self._Booster = train( params, @@ -1764,12 +1767,13 @@ def fit( params = self.get_xgb_params() - if callable(self.objective): - obj: Optional[PlainObj] = _objective_decorator(self.objective) - # Use default value. Is it really not used ? + obj: Optional[Union[PlainObj, _BuiltInObjective]] = None + if isinstance(self.objective, _BuiltInObjective): + obj = self.objective + params["objective"] = self.objective.name + elif callable(self.objective): + obj = _objective_decorator(self.objective) params["objective"] = "binary:logistic" - else: - obj = None if self.n_classes_ > 2: # Switch to using a multiclass objective in the underlying XGB instance diff --git a/python-package/xgboost/testing/multi_target.py b/python-package/xgboost/testing/multi_target.py index e17218e806e1..b8656663cb9f 100644 --- a/python-package/xgboost/testing/multi_target.py +++ b/python-package/xgboost/testing/multi_target.py @@ -18,7 +18,7 @@ from .._typing import ArrayLike from ..compat import import_cupy from ..core import Booster, DMatrix, ExtMemQuantileDMatrix, QuantileDMatrix, build_info -from ..objective import Objective, TreeObjective +from ..objective import BinaryLogistic, Objective from ..sklearn import XGBClassifier from ..training import train from .data import IteratorForTest @@ -52,22 +52,24 @@ def run_multiclass(device: Device, learning_rate: Optional[float]) -> None: def run_multilabel(device: Device, learning_rate: Optional[float]) -> None: """Use vector leaf for multi-label classification models.""" X, y = make_multilabel_classification(128, random_state=2025) - clf = XGBClassifier( - debug_synchronize=True, - multi_strategy="multi_output_tree", - callbacks=[ResetStrategy()], - n_estimators=10, - device=device, - learning_rate=learning_rate, - ) - clf.fit(X, y, eval_set=[(X, y)]) - assert clf.objective == "binary:logistic" - assert non_increasing(clf.evals_result()["validation_0"]["logloss"]) - if learning_rate is not None and abs(learning_rate - 1.0) < 1e-5: - assert clf.evals_result()["validation_0"]["logloss"][-1] < 0.065 + for objective in ("binary:logistic", BinaryLogistic()): + clf = XGBClassifier( + debug_synchronize=True, + multi_strategy="multi_output_tree", + callbacks=[ResetStrategy()], + n_estimators=10, + device=device, + learning_rate=learning_rate, + objective=objective, + ) + clf.fit(X, y, eval_set=[(X, y)]) + assert clf.objective == "binary:logistic" + assert non_increasing(clf.evals_result()["validation_0"]["logloss"]) + if learning_rate is not None and abs(learning_rate - 1.0) < 1e-5: + assert clf.evals_result()["validation_0"]["logloss"][-1] < 0.065 - proba = clf.predict_proba(X) - assert proba.shape == y.shape + proba = clf.predict_proba(X) + assert proba.shape == y.shape def run_quantile_loss(device: Device, weighted: bool) -> None: @@ -144,7 +146,7 @@ def _array_impl(device: Device) -> ModuleType: return nda -class LsObj0(TreeObjective): +class LsObj0(Objective): """Split grad is the same as value grad.""" def __init__(self, device: Device) -> None: diff --git a/python-package/xgboost/testing/objective.py b/python-package/xgboost/testing/objective.py new file mode 100644 index 000000000000..befccf734c3c --- /dev/null +++ b/python-package/xgboost/testing/objective.py @@ -0,0 +1,357 @@ +"""Tests for the built-in objective Python interface.""" + +import json +import os +import pickle +import tempfile +from typing import Callable, Dict, List + +import numpy as np +import pytest + +from ..core import DMatrix +from ..objective import ( + BinaryHinge, + BinaryLogistic, + BinaryLogitRaw, + CountPoisson, + MultiSoftmax, + MultiSoftprob, + RankMAP, + RankNDCG, + RankPairwise, + RegAbsoluteError, + RegExpectileError, + RegGamma, + RegLogistic, + RegPseudoHuberError, + RegQuantileError, + RegSquaredError, + RegSquaredLogError, + RegTweedie, + SurvivalAFT, + SurvivalCox, + _BuiltInObjective, +) +from ..sklearn import XGBClassifier +from ..training import train +from . import make_ltr, make_regression +from .data import get_cancer +from .utils import Device + + +def check_train_regression_objectives(device: Device) -> None: + """Test training with regression objective classes.""" + X, y, _ = make_regression(100, 5, use_cupy=device == "cuda") + dm = DMatrix(X, label=y) + + for obj_inst, obj_name in [ + (RegPseudoHuberError(delta=5.0), "reg:pseudohubererror"), + (RegSquaredError(), "reg:squarederror"), + (RegAbsoluteError(), "reg:absoluteerror"), + ]: + bst = train({"device": device}, dm, num_boost_round=5, obj=obj_inst) + cfg = json.loads(bst.save_config()) + assert cfg["learner"]["objective"]["name"] == obj_name + assert obj_inst.name == obj_name + + bst = train( + {"device": device}, + dm, + num_boost_round=5, + obj=RegQuantileError(alpha=[0.1, 0.5, 0.9]), + ) + pred = bst.predict(dm) + assert pred.shape == (100, 3) + assert RegQuantileError().name == "reg:quantileerror" + + bst = train( + {"device": device}, + dm, + num_boost_round=5, + obj=RegExpectileError(alpha=[0.25, 0.75]), + ) + pred = bst.predict(dm) + assert pred.shape == (100, 2) + assert RegExpectileError().name == "reg:expectileerror" + + +def check_train_positive_objectives(device: Device) -> None: + """Test training with objectives requiring positive labels.""" + X, y, _ = make_regression(100, 5, use_cupy=device == "cuda") + if device == "cuda": + cp = pytest.importorskip("cupy") + y = cp.abs(y) + 0.1 + else: + y = np.abs(y) + 0.1 + dm = DMatrix(X, label=y) + + for obj_inst, obj_name in [ + (RegTweedie(variance_power=1.8), "reg:tweedie"), + (CountPoisson(max_delta_step=0.5), "count:poisson"), + (RegGamma(), "reg:gamma"), + (RegSquaredLogError(), "reg:squaredlogerror"), + ]: + bst = train({"device": device}, dm, num_boost_round=5, obj=obj_inst) + cfg = json.loads(bst.save_config()) + assert cfg["learner"]["objective"]["name"] == obj_name + assert obj_inst.name == obj_name + + +def check_train_classification_objectives(device: Device) -> None: + """Test training with classification objective classes.""" + X, y = get_cancer() + dm = DMatrix(X, label=y) + + for obj_inst, obj_name in [ + (RegLogistic(), "reg:logistic"), + (BinaryLogistic(scale_pos_weight=2.0), "binary:logistic"), + (BinaryLogitRaw(), "binary:logitraw"), + (BinaryHinge(), "binary:hinge"), + ]: + bst = train({"device": device}, dm, num_boost_round=5, obj=obj_inst) + cfg = json.loads(bst.save_config()) + assert cfg["learner"]["objective"]["name"] == obj_name + assert obj_inst.name == obj_name + + datasets = pytest.importorskip("sklearn.datasets") + X_mc, y_mc = datasets.load_digits(n_class=3, return_X_y=True) + dm_mc = DMatrix(X_mc, label=y_mc) + + obj: _BuiltInObjective = MultiSoftmax(num_class=3) + bst = train({"device": device}, dm_mc, num_boost_round=5, obj=obj) + cfg = json.loads(bst.save_config()) + assert cfg["learner"]["objective"]["name"] == "multi:softmax" + assert obj.name == "multi:softmax" + + obj = MultiSoftprob(num_class=3) + bst = train({"device": device}, dm_mc, num_boost_round=5, obj=obj) + pred = bst.predict(dm_mc) + assert pred.shape[1] == 3 + assert obj.name == "multi:softprob" + + +def check_train_survival_objectives(device: Device) -> None: + """Test training with survival objective classes.""" + rng = np.random.RandomState(42) + X = rng.randn(100, 5) + y_lower = np.abs(rng.randn(100)) + y_upper = y_lower + 1.0 + dm = DMatrix(X) + dm.set_info(label_lower_bound=y_lower, label_upper_bound=y_upper) + obj: _BuiltInObjective = SurvivalAFT( + distribution="logistic", distribution_scale=2.0 + ) + bst = train({"device": device}, dm, num_boost_round=5, obj=obj) + cfg = json.loads(bst.save_config()) + assert cfg["learner"]["objective"]["name"] == "survival:aft" + assert obj.name == "survival:aft" + + y_cox = np.abs(rng.randn(100)) + 0.1 + y_cox[:10] *= -1 + dm_cox = DMatrix(X, label=y_cox) + obj = SurvivalCox() + bst = train({"device": device}, dm_cox, num_boost_round=5, obj=obj) + cfg = json.loads(bst.save_config()) + assert cfg["learner"]["objective"]["name"] == "survival:cox" + assert obj.name == "survival:cox" + + +def check_train_ranking_objectives(device: Device) -> None: + """Test training with ranking objective classes.""" + X, y, qid, _ = make_ltr(100, 5, 4, max_rel=1) + dm = DMatrix(X, label=y, qid=qid) + + for obj_inst, obj_name in [ + (RankNDCG(pair_method="mean", exp_gain=False), "rank:ndcg"), + (RankPairwise(), "rank:pairwise"), + (RankMAP(), "rank:map"), + ]: + bst = train({"device": device}, dm, num_boost_round=5, obj=obj_inst) + cfg = json.loads(bst.save_config()) + assert cfg["learner"]["objective"]["name"] == obj_name + assert obj_inst.name == obj_name + + +def _regression_dm() -> DMatrix: + X, y, _ = make_regression(100, 5, use_cupy=False) + return DMatrix(X, label=y) + + +def _positive_dm() -> DMatrix: + X, y, _ = make_regression(100, 5, use_cupy=False) + return DMatrix(X, label=np.abs(y) + 0.1) + + +def _binary_dm() -> DMatrix: + X, y = get_cancer() + return DMatrix(X, label=y) + + +def _survival_dm() -> DMatrix: + rng = np.random.RandomState(42) + X = rng.randn(100, 5) + y_lower = np.abs(rng.randn(100)) + y_upper = y_lower + 1.0 + dm = DMatrix(X) + dm.set_info(label_lower_bound=y_lower, label_upper_bound=y_upper) + return dm + + +def _ranking_dm() -> DMatrix: + X, y, qid, _ = make_ltr(100, 5, 4, max_rel=1) + return DMatrix(X, label=y, qid=qid) + + +def equivalence_parameters() -> List: + """Return parametrized test cases.""" + return [ + # Regression + pytest.param( + RegPseudoHuberError(delta=10.0), + {"objective": "reg:pseudohubererror", "huber_slope": 10.0}, + _regression_dm, + id="reg:pseudohubererror", + ), + pytest.param( + RegQuantileError(alpha=[0.1, 0.5, 0.9]), + {"objective": "reg:quantileerror", "quantile_alpha": "[0.1,0.5,0.9]"}, + _regression_dm, + id="reg:quantileerror", + ), + pytest.param( + RegExpectileError(alpha=[0.25, 0.75]), + {"objective": "reg:expectileerror", "expectile_alpha": "[0.25,0.75]"}, + _regression_dm, + id="reg:expectileerror", + ), + # Positive labels + pytest.param( + RegTweedie(variance_power=1.8), + {"objective": "reg:tweedie", "tweedie_variance_power": 1.8}, + _positive_dm, + id="reg:tweedie", + ), + pytest.param( + CountPoisson(max_delta_step=0.5), + {"objective": "count:poisson", "max_delta_step": 0.5}, + _positive_dm, + id="count:poisson", + ), + # Binary classification + pytest.param( + BinaryLogistic(scale_pos_weight=2.0), + {"objective": "binary:logistic", "scale_pos_weight": 2.0}, + _binary_dm, + id="binary:logistic", + ), + # Survival + pytest.param( + SurvivalAFT(distribution="logistic", distribution_scale=2.0), + { + "objective": "survival:aft", + "aft_loss_distribution": "logistic", + "aft_loss_distribution_scale": 2.0, + }, + _survival_dm, + id="survival:aft", + ), + # Ranking + pytest.param( + RankNDCG(pair_method="mean", exp_gain=False), + { + "objective": "rank:ndcg", + "lambdarank_pair_method": "mean", + "ndcg_exp_gain": False, + }, + _ranking_dm, + id="rank:ndcg", + ), + ] + + +def check_equivalence( + device: Device, + obj_inst: _BuiltInObjective, + str_params: dict, + dm_factory: Callable[[], DMatrix], +) -> None: + """Test that class-based and string-based objectives produce identical results.""" + dm = dm_factory() + + bst_cls = train({"device": device}, dm, num_boost_round=10, obj=obj_inst) + bst_str = train({**str_params, "device": device}, dm, num_boost_round=10) + np.testing.assert_allclose(bst_cls.predict(dm), bst_str.predict(dm), atol=1e-6) + + +def check_default_metrics(device: Device) -> None: + """Test that built-in objectives set the correct default evaluation metrics.""" + X, y, _ = make_regression(100, 5, use_cupy=False) + dm = DMatrix(X, label=y) + + result: Dict[str, Dict] = {} + train( + {"device": device}, + dm, + num_boost_round=3, + evals=[(dm, "train")], + evals_result=result, + obj=RegPseudoHuberError(delta=1.0), + verbose_eval=False, + ) + assert "mphe" in result["train"] + + result = {} + train( + {"device": device}, + dm, + num_boost_round=3, + evals=[(dm, "train")], + evals_result=result, + obj=RegQuantileError(alpha=[0.5]), + verbose_eval=False, + ) + assert "quantile" in result["train"] + + +def check_sklearn_objectives(device: Device) -> None: + """Test objective classes with the scikit-learn interface.""" + X_bin, y_bin = get_cancer() + spw = 2.0 + clf = XGBClassifier( + objective=BinaryLogistic(scale_pos_weight=spw), + n_estimators=5, + device=device, + ) + clf.fit(X_bin, y_bin) + pred = clf.predict(X_bin) + assert set(pred).issubset({0, 1}) + + def chk_param(clf: XGBClassifier) -> None: + cfg = json.loads(clf.get_booster().save_config()) + assert ( + float(cfg["learner"]["objective"]["reg_loss_param"]["scale_pos_weight"]) + == spw + ) + + clf_1 = pickle.loads(pickle.dumps(clf)) + chk_param(clf_1) + with tempfile.TemporaryDirectory() as tmpdir: + path = os.path.join(tmpdir, "clf.json") + clf.save_model(path) + clf_2 = XGBClassifier() + clf_2.load_model(path) + chk_param(clf_2) + + +def all_objective_checks() -> List[Callable[[Device], None]]: + """List of objective tests.""" + return [ + check_default_metrics, + check_sklearn_objectives, + check_train_classification_objectives, + check_train_positive_objectives, + check_train_ranking_objectives, + check_train_regression_objectives, + check_train_survival_objectives, + ] diff --git a/python-package/xgboost/training.py b/python-package/xgboost/training.py index 00adca5a23c2..92136e6b6885 100644 --- a/python-package/xgboost/training.py +++ b/python-package/xgboost/training.py @@ -37,6 +37,7 @@ _deprecate_positional_args, _RefMixIn, ) +from .objective import _BuiltInObjective if TYPE_CHECKING: from pandas import DataFrame as PdDataFrame @@ -49,6 +50,26 @@ ) +def _check_obj( + obj: Optional[Union[PlainObj, _BuiltInObjective]], booster: Booster +) -> Optional[PlainObj]: + builtin_obj = None + if isinstance(obj, _BuiltInObjective): + builtin_obj = obj + obj = None + if builtin_obj is None: + return obj + + if hasattr(builtin_obj, "split_grad"): + raise NotImplementedError( + "`split_grad` is not yet supported for the built-in objectives." + ) + + for key, value in builtin_obj.flat_params().items(): + booster.set_param(key, value) + return obj + + @_deprecate_positional_args def train( params: Dict[str, Any], @@ -56,7 +77,7 @@ def train( num_boost_round: int = 10, *, evals: Optional[Sequence[Tuple[DMatrix, str]]] = None, - obj: Optional[PlainObj] = None, + obj: Optional[Union[PlainObj, _BuiltInObjective]] = None, maximize: Optional[bool] = None, early_stopping_rounds: Optional[int] = None, evals_result: Optional[TrainingCallback.EvalsLog] = None, @@ -181,7 +202,7 @@ def train( raise ValueError(_RefError) bst = Booster(params, [dtrain] + [d[0] for d in evals], model_file=xgb_model) - start_iteration = 0 + obj = _check_obj(obj, bst) if verbose_eval: verbose_eval = 1 if verbose_eval is True else verbose_eval @@ -194,6 +215,7 @@ def train( bst = cb_container.before_training(bst) + start_iteration = 0 for i in range(start_iteration, num_boost_round): if cb_container.before_iteration(bst, i, dtrain, evals): break @@ -578,6 +600,10 @@ def cv( # setup callbacks callbacks = [] if callbacks is None else copy.copy(list(callbacks)) + if isinstance(obj, _BuiltInObjective): + raise NotImplementedError( + "The objective interface is not supported by the CV function yet." + ) if verbose_eval: verbose_eval = 1 if verbose_eval is True else verbose_eval diff --git a/tests/python-gpu/test_gpu_objective.py b/tests/python-gpu/test_gpu_objective.py new file mode 100644 index 000000000000..d24a225f1c7f --- /dev/null +++ b/tests/python-gpu/test_gpu_objective.py @@ -0,0 +1,22 @@ +from typing import Callable + +import pytest +from xgboost.objective import _BuiltInObjective +from xgboost.testing.objective import ( + all_objective_checks, + check_equivalence, + equivalence_parameters, +) +from xgboost.testing.utils import Device + + +@pytest.mark.parametrize("obj_chk", all_objective_checks()) +def test_objectives(obj_chk: Callable[[Device], None]) -> None: + obj_chk("cuda") + + +@pytest.mark.parametrize("obj_inst,str_params,dm_factory", equivalence_parameters()) +def test_equivalence( + obj_inst: _BuiltInObjective, str_params: dict, dm_factory: Callable +) -> None: + check_equivalence("cuda", obj_inst, str_params, dm_factory) diff --git a/tests/python/test_objective.py b/tests/python/test_objective.py new file mode 100644 index 000000000000..136787732d27 --- /dev/null +++ b/tests/python/test_objective.py @@ -0,0 +1,22 @@ +from typing import Callable + +import pytest +from xgboost.objective import _BuiltInObjective +from xgboost.testing.objective import ( + all_objective_checks, + check_equivalence, + equivalence_parameters, +) +from xgboost.testing.utils import Device + + +@pytest.mark.parametrize("obj_chk", all_objective_checks()) +def test_objectives(obj_chk: Callable[[Device], None]) -> None: + obj_chk("cpu") + + +@pytest.mark.parametrize("obj_inst,str_params,dm_factory", equivalence_parameters()) +def test_equivalence( + obj_inst: _BuiltInObjective, str_params: dict, dm_factory: Callable +) -> None: + check_equivalence("cpu", obj_inst, str_params, dm_factory)